]>
Commit | Line | Data |
---|---|---|
68dd551b AF |
1 | diff -ruN linux.orig/Documentation/Configure.help linux-2.2.16/Documentation/Configure.help |
2 | --- linux.orig/Documentation/Configure.help Wed Jun 7 23:26:42 2000 | |
3 | +++ linux-2.2.16/Documentation/Configure.help Fri Jun 9 11:37:48 2000 | |
4 | @@ -961,6 +961,13 @@ | |
5 | ||
6 | If unsure, say N. | |
7 | ||
8 | +Autodetect RAID partitions | |
9 | +CONFIG_AUTODETECT_RAID | |
10 | + This feature lets the kernel detect RAID partitions on bootup. | |
11 | + An autodetect RAID partition is a normal partition with partition | |
12 | + type 0xfd. Use this if you want to boot RAID devices, or want to | |
13 | + run them automatically. | |
14 | + | |
15 | Linear (append) mode | |
16 | CONFIG_MD_LINEAR | |
17 | If you say Y here, then your multiple devices driver will be able to | |
18 | @@ -1039,6 +1046,21 @@ | |
19 | Documentation/modules.txt. | |
20 | ||
21 | If unsure, say Y. | |
22 | + | |
23 | +Translucent Block Device Support (EXPERIMENTAL) | |
24 | +CONFIG_MD_TRANSLUCENT | |
25 | + DO NOT USE THIS STUFF YET! | |
26 | + | |
27 | + currently there is only a placeholder there as the implementation | |
28 | + is not yet usable. | |
29 | + | |
30 | +Hierarchical Storage Management support (EXPERIMENTAL) | |
31 | +CONFIG_MD_HSM | |
32 | + DO NOT USE THIS STUFF YET! | |
33 | + | |
34 | + i have released this so people can comment on the architecture, | |
35 | + but user-space tools are still unusable so there is nothing much | |
36 | + you can do with this. | |
37 | ||
38 | Boot support (linear, striped) | |
39 | CONFIG_MD_BOOT | |
40 | diff -ruN linux.orig/arch/i386/defconfig linux-2.2.16/arch/i386/defconfig | |
41 | --- linux.orig/arch/i386/defconfig Thu May 4 02:16:30 2000 | |
42 | +++ linux-2.2.16/arch/i386/defconfig Fri Jun 9 11:37:45 2000 | |
43 | @@ -93,7 +93,15 @@ | |
44 | # | |
45 | # CONFIG_BLK_DEV_LOOP is not set | |
46 | # CONFIG_BLK_DEV_NBD is not set | |
47 | -# CONFIG_BLK_DEV_MD is not set | |
48 | +CONFIG_BLK_DEV_MD=y | |
49 | +CONFIG_AUTODETECT_RAID=y | |
50 | +CONFIG_MD_TRANSLUCENT=y | |
51 | +CONFIG_MD_LINEAR=y | |
52 | +CONFIG_MD_STRIPED=y | |
53 | +CONFIG_MD_MIRRORING=y | |
54 | +CONFIG_MD_RAID5=y | |
55 | +CONFIG_MD_BOOT=y | |
56 | +CONFIG_BLK_DEV_HSM=y | |
57 | # CONFIG_BLK_DEV_RAM is not set | |
58 | # CONFIG_BLK_DEV_XD is not set | |
59 | # CONFIG_BLK_DEV_DAC960 is not set | |
60 | diff -ruN linux.orig/arch/sparc/config.in linux-2.2.16/arch/sparc/config.in | |
61 | --- linux.orig/arch/sparc/config.in Wed Jun 7 23:26:42 2000 | |
62 | +++ linux-2.2.16/arch/sparc/config.in Fri Jun 9 11:37:45 2000 | |
63 | @@ -1,4 +1,4 @@ | |
64 | -# $Id$ | |
65 | +# $Id$ | |
66 | # For a description of the syntax of this configuration file, | |
67 | # see Documentation/kbuild/config-language.txt. | |
68 | # | |
69 | @@ -85,10 +85,16 @@ | |
70 | ||
71 | bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD | |
72 | if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then | |
73 | + bool 'Autodetect RAID partitions' CONFIG_AUTODETECT_RAID | |
74 | tristate ' Linear (append) mode' CONFIG_MD_LINEAR | |
75 | tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED | |
76 | tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING | |
77 | tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 | |
78 | + tristate ' Translucent mode' CONFIG_MD_TRANSLUCENT | |
79 | + tristate ' Hierarchical Storage Management support' CONFIG_MD_HSM | |
80 | +fi | |
81 | +if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then | |
82 | + bool ' Boot support (linear, striped)' CONFIG_MD_BOOT | |
83 | fi | |
84 | ||
85 | tristate 'RAM disk support' CONFIG_BLK_DEV_RAM | |
86 | diff -ruN linux.orig/arch/sparc/defconfig linux-2.2.16/arch/sparc/defconfig | |
87 | --- linux.orig/arch/sparc/defconfig Thu May 4 02:16:32 2000 | |
88 | +++ linux-2.2.16/arch/sparc/defconfig Fri Jun 9 11:37:45 2000 | |
89 | @@ -88,10 +88,13 @@ | |
90 | # | |
91 | CONFIG_BLK_DEV_FD=y | |
92 | CONFIG_BLK_DEV_MD=y | |
93 | +# CONFIG_AUTODETECT_RAID is not set | |
94 | CONFIG_MD_LINEAR=m | |
95 | CONFIG_MD_STRIPED=m | |
96 | CONFIG_MD_MIRRORING=m | |
97 | CONFIG_MD_RAID5=m | |
98 | +# CONFIG_MD_TRANSLUCENT is not set | |
99 | +# CONFIG_MD_HSM is not set | |
100 | CONFIG_BLK_DEV_RAM=y | |
101 | CONFIG_BLK_DEV_INITRD=y | |
102 | CONFIG_BLK_DEV_LOOP=m | |
103 | diff -ruN linux.orig/arch/sparc64/config.in linux-2.2.16/arch/sparc64/config.in | |
104 | --- linux.orig/arch/sparc64/config.in Wed Jun 7 23:26:42 2000 | |
105 | +++ linux-2.2.16/arch/sparc64/config.in Fri Jun 9 11:37:48 2000 | |
106 | @@ -97,10 +97,16 @@ | |
107 | ||
108 | bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD | |
109 | if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then | |
110 | + bool 'Autodetect RAID partitions' CONFIG_AUTODETECT_RAID | |
111 | tristate ' Linear (append) mode' CONFIG_MD_LINEAR | |
112 | tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED | |
113 | tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING | |
114 | tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 | |
115 | + tristate ' Translucent mode' CONFIG_MD_TRANSLUCENT | |
116 | + tristate ' Hierarchical Storage Management support' CONFIG_MD_HSM | |
117 | +fi | |
118 | +if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then | |
119 | + bool ' Boot support (linear, striped)' CONFIG_MD_BOOT | |
120 | fi | |
121 | ||
122 | tristate 'RAM disk support' CONFIG_BLK_DEV_RAM | |
123 | diff -ruN linux.orig/arch/sparc64/defconfig linux-2.2.16/arch/sparc64/defconfig | |
124 | --- linux.orig/arch/sparc64/defconfig Wed Jun 7 23:26:42 2000 | |
125 | +++ linux-2.2.16/arch/sparc64/defconfig Fri Jun 9 11:37:48 2000 | |
126 | @@ -107,10 +107,13 @@ | |
127 | # | |
128 | CONFIG_BLK_DEV_FD=y | |
129 | CONFIG_BLK_DEV_MD=y | |
130 | +# CONFIG_AUTODETECT_RAID is not set | |
131 | CONFIG_MD_LINEAR=m | |
132 | CONFIG_MD_STRIPED=m | |
133 | CONFIG_MD_MIRRORING=m | |
134 | CONFIG_MD_RAID5=m | |
135 | +# CONFIG_MD_TRANSLUCENT is not set | |
136 | +# CONFIG_MD_HSM is not set | |
137 | CONFIG_BLK_DEV_RAM=y | |
138 | CONFIG_BLK_DEV_INITRD=y | |
139 | CONFIG_BLK_DEV_LOOP=m | |
140 | diff -ruN linux.orig/drivers/block/Config.in linux-2.2.16/drivers/block/Config.in | |
141 | --- linux.orig/drivers/block/Config.in Wed Jun 7 23:26:42 2000 | |
142 | +++ linux-2.2.16/drivers/block/Config.in Fri Jun 9 11:37:45 2000 | |
143 | @@ -102,10 +102,13 @@ | |
144 | fi | |
145 | bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD | |
146 | if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then | |
147 | + bool 'Autodetect RAID partitions' CONFIG_AUTODETECT_RAID | |
148 | tristate ' Linear (append) mode' CONFIG_MD_LINEAR | |
149 | tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED | |
150 | tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING | |
151 | tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 | |
152 | + tristate ' Translucent mode' CONFIG_MD_TRANSLUCENT | |
153 | + tristate ' Hierarchical Storage Management support' CONFIG_MD_HSM | |
154 | fi | |
155 | if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then | |
156 | bool ' Boot support (linear, striped)' CONFIG_MD_BOOT | |
157 | diff -ruN linux.orig/drivers/block/Makefile linux-2.2.16/drivers/block/Makefile | |
158 | --- linux.orig/drivers/block/Makefile Thu May 4 02:16:32 2000 | |
159 | +++ linux-2.2.16/drivers/block/Makefile Fri Jun 9 11:37:45 2000 | |
160 | @@ -282,10 +282,28 @@ | |
161 | endif | |
162 | ||
163 | ifeq ($(CONFIG_MD_RAID5),y) | |
164 | +LX_OBJS += xor.o | |
165 | L_OBJS += raid5.o | |
166 | else | |
167 | ifeq ($(CONFIG_MD_RAID5),m) | |
168 | + LX_OBJS += xor.o | |
169 | M_OBJS += raid5.o | |
170 | + endif | |
171 | +endif | |
172 | + | |
173 | +ifeq ($(CONFIG_MD_TRANSLUCENT),y) | |
174 | +L_OBJS += translucent.o | |
175 | +else | |
176 | + ifeq ($(CONFIG_MD_TRANSLUCENT),m) | |
177 | + M_OBJS += translucent.o | |
178 | + endif | |
179 | +endif | |
180 | + | |
181 | +ifeq ($(CONFIG_MD_HSM),y) | |
182 | +L_OBJS += hsm.o | |
183 | +else | |
184 | + ifeq ($(CONFIG_MD_HSM),m) | |
185 | + M_OBJS += hsm.o | |
186 | endif | |
187 | endif | |
188 | ||
189 | diff -ruN linux.orig/drivers/block/genhd.c linux-2.2.16/drivers/block/genhd.c | |
190 | --- linux.orig/drivers/block/genhd.c Wed Jun 7 23:26:42 2000 | |
191 | +++ linux-2.2.16/drivers/block/genhd.c Fri Jun 9 11:37:45 2000 | |
192 | @@ -28,6 +28,7 @@ | |
193 | #include <linux/string.h> | |
194 | #include <linux/blk.h> | |
195 | #include <linux/init.h> | |
196 | +#include <linux/raid/md.h> | |
197 | ||
198 | #include <asm/system.h> | |
199 | #include <asm/byteorder.h> | |
200 | @@ -1649,6 +1650,9 @@ | |
201 | else | |
202 | #endif | |
203 | rd_load(); | |
204 | +#endif | |
205 | +#ifdef CONFIG_BLK_DEV_MD | |
206 | + autodetect_raid(); | |
207 | #endif | |
208 | #ifdef CONFIG_MD_BOOT | |
209 | md_setup_drive(); | |
210 | diff -ruN linux.orig/drivers/block/hsm.c linux-2.2.16/drivers/block/hsm.c | |
211 | --- linux.orig/drivers/block/hsm.c Thu Jan 1 01:00:00 1970 | |
212 | +++ linux-2.2.16/drivers/block/hsm.c Fri Jun 9 11:37:45 2000 | |
213 | @@ -0,0 +1,840 @@ | |
214 | +/* | |
215 | + hsm.c : HSM RAID driver for Linux | |
216 | + Copyright (C) 1998 Ingo Molnar | |
217 | + | |
218 | + HSM mode management functions. | |
219 | + | |
220 | + This program is free software; you can redistribute it and/or modify | |
221 | + it under the terms of the GNU General Public License as published by | |
222 | + the Free Software Foundation; either version 2, or (at your option) | |
223 | + any later version. | |
224 | + | |
225 | + You should have received a copy of the GNU General Public License | |
226 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
227 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
228 | +*/ | |
229 | + | |
230 | +#include <linux/module.h> | |
231 | + | |
232 | +#include <linux/raid/md.h> | |
233 | +#include <linux/malloc.h> | |
234 | + | |
235 | +#include <linux/raid/hsm.h> | |
236 | +#include <linux/blk.h> | |
237 | + | |
238 | +#define MAJOR_NR MD_MAJOR | |
239 | +#define MD_DRIVER | |
240 | +#define MD_PERSONALITY | |
241 | + | |
242 | + | |
243 | +#define DEBUG_HSM 1 | |
244 | + | |
245 | +#if DEBUG_HSM | |
246 | +#define dprintk(x,y...) printk(x,##y) | |
247 | +#else | |
248 | +#define dprintk(x,y...) do { } while (0) | |
249 | +#endif | |
250 | + | |
251 | +void print_bh(struct buffer_head *bh) | |
252 | +{ | |
253 | + dprintk("bh %p: %lx %lx %x %x %lx %p %lx %p %x %p %x %lx\n", bh, | |
254 | + bh->b_blocknr, bh->b_size, bh->b_dev, bh->b_rdev, | |
255 | + bh->b_rsector, bh->b_this_page, bh->b_state, | |
256 | + bh->b_next_free, bh->b_count, bh->b_data, | |
257 | + bh->b_list, bh->b_flushtime | |
258 | + ); | |
259 | +} | |
260 | + | |
261 | +static int check_bg (pv_t *pv, pv_block_group_t * bg) | |
262 | +{ | |
263 | + int i, free = 0; | |
264 | + | |
265 | + dprintk("checking bg ...\n"); | |
266 | + | |
267 | + for (i = 0; i < pv->pv_sb->pv_bg_size-1; i++) { | |
268 | + if (pv_pptr_free(bg->blocks + i)) { | |
269 | + free++; | |
270 | + if (test_bit(i, bg->used_bitmap)) { | |
271 | + printk("hm, bit %d set?\n", i); | |
272 | + } | |
273 | + } else { | |
274 | + if (!test_bit(i, bg->used_bitmap)) { | |
275 | + printk("hm, bit %d not set?\n", i); | |
276 | + } | |
277 | + } | |
278 | + } | |
279 | + dprintk("%d free blocks in bg ...\n", free); | |
280 | + return free; | |
281 | +} | |
282 | + | |
283 | +static void get_bg (pv_t *pv, pv_bg_desc_t *desc, int nr) | |
284 | +{ | |
285 | + unsigned int bg_pos = nr * pv->pv_sb->pv_bg_size + 2; | |
286 | + struct buffer_head *bh; | |
287 | + | |
288 | + dprintk("... getting BG at %u ...\n", bg_pos); | |
289 | + | |
290 | + bh = bread (pv->dev, bg_pos, HSM_BLOCKSIZE); | |
291 | + if (!bh) { | |
292 | + MD_BUG(); | |
293 | + return; | |
294 | + } | |
295 | + desc->bg = (pv_block_group_t *) bh->b_data; | |
296 | + desc->free_blocks = check_bg(pv, desc->bg); | |
297 | +} | |
298 | + | |
299 | +static int find_free_block (lv_t *lv, pv_t *pv, pv_bg_desc_t *desc, int nr, | |
300 | + unsigned int lblock, lv_lptr_t * index) | |
301 | +{ | |
302 | + int i; | |
303 | + | |
304 | + for (i = 0; i < pv->pv_sb->pv_bg_size-1; i++) { | |
305 | + pv_pptr_t * bptr = desc->bg->blocks + i; | |
306 | + if (pv_pptr_free(bptr)) { | |
307 | + unsigned int bg_pos = nr * pv->pv_sb->pv_bg_size + 2; | |
308 | + | |
309 | + if (test_bit(i, desc->bg->used_bitmap)) { | |
310 | + MD_BUG(); | |
311 | + continue; | |
312 | + } | |
313 | + bptr->u.used.owner.log_id = lv->log_id; | |
314 | + bptr->u.used.owner.log_index = lblock; | |
315 | + index->data.phys_nr = pv->phys_nr; | |
316 | + index->data.phys_block = bg_pos + i + 1; | |
317 | + set_bit(i, desc->bg->used_bitmap); | |
318 | + desc->free_blocks--; | |
319 | + dprintk(".....free blocks left in bg %p: %d\n", | |
320 | + desc->bg, desc->free_blocks); | |
321 | + return 0; | |
322 | + } | |
323 | + } | |
324 | + return -ENOSPC; | |
325 | +} | |
326 | + | |
327 | +static int __get_free_block (lv_t *lv, pv_t *pv, | |
328 | + unsigned int lblock, lv_lptr_t * index) | |
329 | +{ | |
330 | + int i; | |
331 | + | |
332 | + dprintk("trying to get free block for lblock %d ...\n", lblock); | |
333 | + | |
334 | + for (i = 0; i < pv->pv_sb->pv_block_groups; i++) { | |
335 | + pv_bg_desc_t *desc = pv->bg_array + i; | |
336 | + | |
337 | + dprintk("looking at desc #%d (%p)...\n", i, desc->bg); | |
338 | + if (!desc->bg) | |
339 | + get_bg(pv, desc, i); | |
340 | + | |
341 | + if (desc->bg && desc->free_blocks) | |
342 | + return find_free_block(lv, pv, desc, i, | |
343 | + lblock, index); | |
344 | + } | |
345 | + dprintk("hsm: pv %s full!\n", partition_name(pv->dev)); | |
346 | + return -ENOSPC; | |
347 | +} | |
348 | + | |
349 | +static int get_free_block (lv_t *lv, unsigned int lblock, lv_lptr_t * index) | |
350 | +{ | |
351 | + int err; | |
352 | + | |
353 | + if (!lv->free_indices) | |
354 | + return -ENOSPC; | |
355 | + | |
356 | + /* fix me */ | |
357 | + err = __get_free_block(lv, lv->vg->pv_array + 0, lblock, index); | |
358 | + | |
359 | + if (err || !index->data.phys_block) { | |
360 | + MD_BUG(); | |
361 | + return -ENOSPC; | |
362 | + } | |
363 | + | |
364 | + lv->free_indices--; | |
365 | + | |
366 | + return 0; | |
367 | +} | |
368 | + | |
369 | +/* | |
370 | + * fix me: wordsize assumptions ... | |
371 | + */ | |
372 | +#define INDEX_BITS 8 | |
373 | +#define INDEX_DEPTH (32/INDEX_BITS) | |
374 | +#define INDEX_MASK ((1<<INDEX_BITS) - 1) | |
375 | + | |
376 | +static void print_index_list (lv_t *lv, lv_lptr_t *index) | |
377 | +{ | |
378 | + lv_lptr_t *tmp; | |
379 | + int i; | |
380 | + | |
381 | + dprintk("... block <%u,%u,%x> [.", index->data.phys_nr, | |
382 | + index->data.phys_block, index->cpu_addr); | |
383 | + | |
384 | + tmp = index_child(index); | |
385 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
386 | + if (index_block(lv, tmp)) | |
387 | + dprintk("(%d->%d)", i, index_block(lv, tmp)); | |
388 | + tmp++; | |
389 | + } | |
390 | + dprintk(".]\n"); | |
391 | +} | |
392 | + | |
393 | +static int read_index_group (lv_t *lv, lv_lptr_t *index) | |
394 | +{ | |
395 | + lv_lptr_t *index_group, *tmp; | |
396 | + struct buffer_head *bh; | |
397 | + int i; | |
398 | + | |
399 | + dprintk("reading index group <%s:%d>\n", | |
400 | + partition_name(index_dev(lv, index)), index_block(lv, index)); | |
401 | + | |
402 | + bh = bread(index_dev(lv, index), index_block(lv, index), HSM_BLOCKSIZE); | |
403 | + if (!bh) { | |
404 | + MD_BUG(); | |
405 | + return -EIO; | |
406 | + } | |
407 | + if (!buffer_uptodate(bh)) | |
408 | + MD_BUG(); | |
409 | + | |
410 | + index_group = (lv_lptr_t *) bh->b_data; | |
411 | + tmp = index_group; | |
412 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
413 | + if (index_block(lv, tmp)) { | |
414 | + dprintk("index group has BLOCK %d, non-present.\n", i); | |
415 | + tmp->cpu_addr = 0; | |
416 | + } | |
417 | + tmp++; | |
418 | + } | |
419 | + index->cpu_addr = ptr_to_cpuaddr(index_group); | |
420 | + | |
421 | + dprintk("have read index group %p at block %d.\n", | |
422 | + index_group, index_block(lv, index)); | |
423 | + print_index_list(lv, index); | |
424 | + | |
425 | + return 0; | |
426 | +} | |
427 | + | |
428 | +static int alloc_index_group (lv_t *lv, unsigned int lblock, lv_lptr_t * index) | |
429 | +{ | |
430 | + struct buffer_head *bh; | |
431 | + lv_lptr_t * index_group; | |
432 | + | |
433 | + if (get_free_block(lv, lblock, index)) | |
434 | + return -ENOSPC; | |
435 | + | |
436 | + dprintk("creating block for index group <%s:%d>\n", | |
437 | + partition_name(index_dev(lv, index)), index_block(lv, index)); | |
438 | + | |
439 | + bh = getblk(index_dev(lv, index), | |
440 | + index_block(lv, index), HSM_BLOCKSIZE); | |
441 | + | |
442 | + index_group = (lv_lptr_t *) bh->b_data; | |
443 | + md_clear_page(index_group); | |
444 | + mark_buffer_uptodate(bh, 1); | |
445 | + | |
446 | + index->cpu_addr = ptr_to_cpuaddr(index_group); | |
447 | + | |
448 | + dprintk("allocated index group %p at block %d.\n", | |
449 | + index_group, index_block(lv, index)); | |
450 | + return 0; | |
451 | +} | |
452 | + | |
453 | +static lv_lptr_t * alloc_fixed_index (lv_t *lv, unsigned int lblock) | |
454 | +{ | |
455 | + lv_lptr_t * index = index_child(&lv->root_index); | |
456 | + int idx, l; | |
457 | + | |
458 | + for (l = INDEX_DEPTH-1; l >= 0; l--) { | |
459 | + idx = (lblock >> (INDEX_BITS*l)) & INDEX_MASK; | |
460 | + index += idx; | |
461 | + if (!l) | |
462 | + break; | |
463 | + if (!index_present(index)) { | |
464 | + dprintk("no group, level %u, pos %u\n", l, idx); | |
465 | + if (alloc_index_group(lv, lblock, index)) | |
466 | + return NULL; | |
467 | + } | |
468 | + index = index_child(index); | |
469 | + } | |
470 | + if (!index_block(lv,index)) { | |
471 | + dprintk("no data, pos %u\n", idx); | |
472 | + if (get_free_block(lv, lblock, index)) | |
473 | + return NULL; | |
474 | + return index; | |
475 | + } | |
476 | + MD_BUG(); | |
477 | + return index; | |
478 | +} | |
479 | + | |
480 | +static lv_lptr_t * find_index (lv_t *lv, unsigned int lblock) | |
481 | +{ | |
482 | + lv_lptr_t * index = index_child(&lv->root_index); | |
483 | + int idx, l; | |
484 | + | |
485 | + for (l = INDEX_DEPTH-1; l >= 0; l--) { | |
486 | + idx = (lblock >> (INDEX_BITS*l)) & INDEX_MASK; | |
487 | + index += idx; | |
488 | + if (!l) | |
489 | + break; | |
490 | + if (index_free(index)) | |
491 | + return NULL; | |
492 | + if (!index_present(index)) | |
493 | + read_index_group(lv, index); | |
494 | + if (!index_present(index)) { | |
495 | + MD_BUG(); | |
496 | + return NULL; | |
497 | + } | |
498 | + index = index_child(index); | |
499 | + } | |
500 | + if (!index_block(lv,index)) | |
501 | + return NULL; | |
502 | + return index; | |
503 | +} | |
504 | + | |
505 | +static int read_root_index(lv_t *lv) | |
506 | +{ | |
507 | + int err; | |
508 | + lv_lptr_t *index = &lv->root_index; | |
509 | + | |
510 | + if (!index_block(lv, index)) { | |
511 | + printk("LV has no root index yet, creating.\n"); | |
512 | + | |
513 | + err = alloc_index_group (lv, 0, index); | |
514 | + if (err) { | |
515 | + printk("could not create index group, err:%d\n", err); | |
516 | + return err; | |
517 | + } | |
518 | + lv->vg->vg_sb->lv_array[lv->log_id].lv_root_idx = | |
519 | + lv->root_index.data; | |
520 | + } else { | |
521 | + printk("LV already has a root index.\n"); | |
522 | + printk("... at <%s:%d>.\n", | |
523 | + partition_name(index_dev(lv, index)), | |
524 | + index_block(lv, index)); | |
525 | + | |
526 | + read_index_group(lv, index); | |
527 | + } | |
528 | + return 0; | |
529 | +} | |
530 | + | |
531 | +static int init_pv(pv_t *pv) | |
532 | +{ | |
533 | + struct buffer_head *bh; | |
534 | + pv_sb_t *pv_sb; | |
535 | + | |
536 | + bh = bread (pv->dev, 0, HSM_BLOCKSIZE); | |
537 | + if (!bh) { | |
538 | + MD_BUG(); | |
539 | + return -1; | |
540 | + } | |
541 | + | |
542 | + pv_sb = (pv_sb_t *) bh->b_data; | |
543 | + pv->pv_sb = pv_sb; | |
544 | + | |
545 | + if (pv_sb->pv_magic != HSM_PV_SB_MAGIC) { | |
546 | + printk("%s is not a PV, has magic %x instead of %x!\n", | |
547 | + partition_name(pv->dev), pv_sb->pv_magic, | |
548 | + HSM_PV_SB_MAGIC); | |
549 | + return -1; | |
550 | + } | |
551 | + printk("%s detected as a valid PV (#%d).\n", partition_name(pv->dev), | |
552 | + pv->phys_nr); | |
553 | + printk("... created under HSM version %d.%d.%d, at %x.\n", | |
554 | + pv_sb->pv_major, pv_sb->pv_minor, pv_sb->pv_patch, pv_sb->pv_ctime); | |
555 | + printk("... total # of blocks: %d (%d left unallocated).\n", | |
556 | + pv_sb->pv_total_size, pv_sb->pv_blocks_left); | |
557 | + | |
558 | + printk("... block size: %d bytes.\n", pv_sb->pv_block_size); | |
559 | + printk("... block descriptor size: %d bytes.\n", pv_sb->pv_pptr_size); | |
560 | + printk("... block group size: %d blocks.\n", pv_sb->pv_bg_size); | |
561 | + printk("... # of block groups: %d.\n", pv_sb->pv_block_groups); | |
562 | + | |
563 | + if (pv_sb->pv_block_groups*sizeof(pv_bg_desc_t) > PAGE_SIZE) { | |
564 | + MD_BUG(); | |
565 | + return 1; | |
566 | + } | |
567 | + pv->bg_array = (pv_bg_desc_t *)__get_free_page(GFP_KERNEL); | |
568 | + if (!pv->bg_array) { | |
569 | + MD_BUG(); | |
570 | + return 1; | |
571 | + } | |
572 | + memset(pv->bg_array, 0, PAGE_SIZE); | |
573 | + | |
574 | + return 0; | |
575 | +} | |
576 | + | |
577 | +static int free_pv(pv_t *pv) | |
578 | +{ | |
579 | + struct buffer_head *bh; | |
580 | + | |
581 | + dprintk("freeing PV %d ...\n", pv->phys_nr); | |
582 | + | |
583 | + if (pv->bg_array) { | |
584 | + int i; | |
585 | + | |
586 | + dprintk(".... freeing BGs ...\n"); | |
587 | + for (i = 0; i < pv->pv_sb->pv_block_groups; i++) { | |
588 | + unsigned int bg_pos = i * pv->pv_sb->pv_bg_size + 2; | |
589 | + pv_bg_desc_t *desc = pv->bg_array + i; | |
590 | + | |
591 | + if (desc->bg) { | |
592 | + dprintk(".... freeing BG %d ...\n", i); | |
593 | + bh = getblk (pv->dev, bg_pos, HSM_BLOCKSIZE); | |
594 | + mark_buffer_dirty(bh, 1); | |
595 | + brelse(bh); | |
596 | + brelse(bh); | |
597 | + } | |
598 | + } | |
599 | + free_page((unsigned long)pv->bg_array); | |
600 | + } else | |
601 | + MD_BUG(); | |
602 | + | |
603 | + bh = getblk (pv->dev, 0, HSM_BLOCKSIZE); | |
604 | + if (!bh) { | |
605 | + MD_BUG(); | |
606 | + return -1; | |
607 | + } | |
608 | + mark_buffer_dirty(bh, 1); | |
609 | + brelse(bh); | |
610 | + brelse(bh); | |
611 | + | |
612 | + return 0; | |
613 | +} | |
614 | + | |
615 | +struct semaphore hsm_sem = MUTEX; | |
616 | + | |
617 | +#define HSM_SECTORS (HSM_BLOCKSIZE/512) | |
618 | + | |
619 | +static int hsm_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
620 | + unsigned long *rsector, unsigned long bsectors) | |
621 | +{ | |
622 | + lv_t *lv = kdev_to_lv(dev); | |
623 | + lv_lptr_t *index; | |
624 | + unsigned int lblock = *rsector / HSM_SECTORS; | |
625 | + unsigned int offset = *rsector % HSM_SECTORS; | |
626 | + int err = -EIO; | |
627 | + | |
628 | + if (!lv) { | |
629 | + printk("HSM: md%d not a Logical Volume!\n", mdidx(mddev)); | |
630 | + goto out; | |
631 | + } | |
632 | + if (offset + bsectors > HSM_SECTORS) { | |
633 | + MD_BUG(); | |
634 | + goto out; | |
635 | + } | |
636 | + down(&hsm_sem); | |
637 | + index = find_index(lv, lblock); | |
638 | + if (!index) { | |
639 | + printk("no block %u yet ... allocating\n", lblock); | |
640 | + index = alloc_fixed_index(lv, lblock); | |
641 | + } | |
642 | + | |
643 | + err = 0; | |
644 | + | |
645 | + printk(" %u <%s : %ld(%ld)> -> ", lblock, | |
646 | + partition_name(*rdev), *rsector, bsectors); | |
647 | + | |
648 | + *rdev = index_dev(lv, index); | |
649 | + *rsector = index_block(lv, index) * HSM_SECTORS + offset; | |
650 | + | |
651 | + printk(" <%s : %ld> %u\n", | |
652 | + partition_name(*rdev), *rsector, index_block(lv, index)); | |
653 | + | |
654 | + up(&hsm_sem); | |
655 | +out: | |
656 | + return err; | |
657 | +} | |
658 | + | |
659 | +static void free_index (lv_t *lv, lv_lptr_t * index) | |
660 | +{ | |
661 | + struct buffer_head *bh; | |
662 | + | |
663 | + printk("tryin to get cached block for index group <%s:%d>\n", | |
664 | + partition_name(index_dev(lv, index)), index_block(lv, index)); | |
665 | + | |
666 | + bh = getblk(index_dev(lv, index), index_block(lv, index),HSM_BLOCKSIZE); | |
667 | + | |
668 | + printk("....FREEING "); | |
669 | + print_index_list(lv, index); | |
670 | + | |
671 | + if (bh) { | |
672 | + if (!buffer_uptodate(bh)) | |
673 | + MD_BUG(); | |
674 | + if ((lv_lptr_t *)bh->b_data != index_child(index)) { | |
675 | + printk("huh? b_data is %p, index content is %p.\n", | |
676 | + bh->b_data, index_child(index)); | |
677 | + } else | |
678 | + printk("good, b_data == index content == %p.\n", | |
679 | + index_child(index)); | |
680 | + printk("b_count == %d, writing.\n", bh->b_count); | |
681 | + mark_buffer_dirty(bh, 1); | |
682 | + brelse(bh); | |
683 | + brelse(bh); | |
684 | + printk("done.\n"); | |
685 | + } else { | |
686 | + printk("FAILED!\n"); | |
687 | + } | |
688 | + print_index_list(lv, index); | |
689 | + index_child(index) = NULL; | |
690 | +} | |
691 | + | |
692 | +static void free_index_group (lv_t *lv, int level, lv_lptr_t * index_0) | |
693 | +{ | |
694 | + char dots [3*8]; | |
695 | + lv_lptr_t * index; | |
696 | + int i, nr_dots; | |
697 | + | |
698 | + nr_dots = (INDEX_DEPTH-level)*3; | |
699 | + memcpy(dots,"...............",nr_dots); | |
700 | + dots[nr_dots] = 0; | |
701 | + | |
702 | + dprintk("%s level %d index group block:\n", dots, level); | |
703 | + | |
704 | + | |
705 | + index = index_0; | |
706 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
707 | + if (index->data.phys_block) { | |
708 | + dprintk("%s block <%u,%u,%x>\n", dots, | |
709 | + index->data.phys_nr, | |
710 | + index->data.phys_block, | |
711 | + index->cpu_addr); | |
712 | + if (level && index_present(index)) { | |
713 | + dprintk("%s==> deeper one level\n", dots); | |
714 | + free_index_group(lv, level-1, | |
715 | + index_child(index)); | |
716 | + dprintk("%s freeing index group block %p ...", | |
717 | + dots, index_child(index)); | |
718 | + free_index(lv, index); | |
719 | + } | |
720 | + } | |
721 | + index++; | |
722 | + } | |
723 | + dprintk("%s DONE: level %d index group block.\n", dots, level); | |
724 | +} | |
725 | + | |
726 | +static void free_lv_indextree (lv_t *lv) | |
727 | +{ | |
728 | + dprintk("freeing LV %d ...\n", lv->log_id); | |
729 | + dprintk("..root index: %p\n", index_child(&lv->root_index)); | |
730 | + dprintk("..INDEX TREE:\n"); | |
731 | + free_index_group(lv, INDEX_DEPTH-1, index_child(&lv->root_index)); | |
732 | + dprintk("..freeing root index %p ...", index_child(&lv->root_index)); | |
733 | + dprintk("root block <%u,%u,%x>\n", lv->root_index.data.phys_nr, | |
734 | + lv->root_index.data.phys_block, lv->root_index.cpu_addr); | |
735 | + free_index(lv, &lv->root_index); | |
736 | + dprintk("..INDEX TREE done.\n"); | |
737 | + fsync_dev(lv->vg->pv_array[0].dev); /* fix me */ | |
738 | + lv->vg->vg_sb->lv_array[lv->log_id].lv_free_indices = lv->free_indices; | |
739 | +} | |
740 | + | |
741 | +static void print_index_group (lv_t *lv, int level, lv_lptr_t * index_0) | |
742 | +{ | |
743 | + char dots [3*5]; | |
744 | + lv_lptr_t * index; | |
745 | + int i, nr_dots; | |
746 | + | |
747 | + nr_dots = (INDEX_DEPTH-level)*3; | |
748 | + memcpy(dots,"...............",nr_dots); | |
749 | + dots[nr_dots] = 0; | |
750 | + | |
751 | + dprintk("%s level %d index group block:\n", dots, level); | |
752 | + | |
753 | + | |
754 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
755 | + index = index_0 + i; | |
756 | + if (index->data.phys_block) { | |
757 | + dprintk("%s block <%u,%u,%x>\n", dots, | |
758 | + index->data.phys_nr, | |
759 | + index->data.phys_block, | |
760 | + index->cpu_addr); | |
761 | + if (level && index_present(index)) { | |
762 | + dprintk("%s==> deeper one level\n", dots); | |
763 | + print_index_group(lv, level-1, | |
764 | + index_child(index)); | |
765 | + } | |
766 | + } | |
767 | + } | |
768 | + dprintk("%s DONE: level %d index group block.\n", dots, level); | |
769 | +} | |
770 | + | |
771 | +static void print_lv (lv_t *lv) | |
772 | +{ | |
773 | + dprintk("printing LV %d ...\n", lv->log_id); | |
774 | + dprintk("..root index: %p\n", index_child(&lv->root_index)); | |
775 | + dprintk("..INDEX TREE:\n"); | |
776 | + print_index_group(lv, INDEX_DEPTH-1, index_child(&lv->root_index)); | |
777 | + dprintk("..INDEX TREE done.\n"); | |
778 | +} | |
779 | + | |
780 | +static int map_lv (lv_t *lv) | |
781 | +{ | |
782 | + kdev_t dev = lv->dev; | |
783 | + unsigned int nr = MINOR(dev); | |
784 | + mddev_t *mddev = lv->vg->mddev; | |
785 | + | |
786 | + if (MAJOR(dev) != MD_MAJOR) { | |
787 | + MD_BUG(); | |
788 | + return -1; | |
789 | + } | |
790 | + if (kdev_to_mddev(dev)) { | |
791 | + MD_BUG(); | |
792 | + return -1; | |
793 | + } | |
794 | + md_hd_struct[nr].start_sect = 0; | |
795 | + md_hd_struct[nr].nr_sects = md_size[mdidx(mddev)] << 1; | |
796 | + md_size[nr] = md_size[mdidx(mddev)]; | |
797 | + add_mddev_mapping(mddev, dev, lv); | |
798 | + | |
799 | + return 0; | |
800 | +} | |
801 | + | |
802 | +static int unmap_lv (lv_t *lv) | |
803 | +{ | |
804 | + kdev_t dev = lv->dev; | |
805 | + unsigned int nr = MINOR(dev); | |
806 | + | |
807 | + if (MAJOR(dev) != MD_MAJOR) { | |
808 | + MD_BUG(); | |
809 | + return -1; | |
810 | + } | |
811 | + md_hd_struct[nr].start_sect = 0; | |
812 | + md_hd_struct[nr].nr_sects = 0; | |
813 | + md_size[nr] = 0; | |
814 | + del_mddev_mapping(lv->vg->mddev, dev); | |
815 | + | |
816 | + return 0; | |
817 | +} | |
818 | + | |
819 | +static int init_vg (vg_t *vg) | |
820 | +{ | |
821 | + int i; | |
822 | + lv_t *lv; | |
823 | + kdev_t dev; | |
824 | + vg_sb_t *vg_sb; | |
825 | + struct buffer_head *bh; | |
826 | + lv_descriptor_t *lv_desc; | |
827 | + | |
828 | + /* | |
829 | + * fix me: read all PVs and compare the SB | |
830 | + */ | |
831 | + dev = vg->pv_array[0].dev; | |
832 | + bh = bread (dev, 1, HSM_BLOCKSIZE); | |
833 | + if (!bh) { | |
834 | + MD_BUG(); | |
835 | + return -1; | |
836 | + } | |
837 | + | |
838 | + vg_sb = (vg_sb_t *) bh->b_data; | |
839 | + vg->vg_sb = vg_sb; | |
840 | + | |
841 | + if (vg_sb->vg_magic != HSM_VG_SB_MAGIC) { | |
842 | + printk("%s is not a valid VG, has magic %x instead of %x!\n", | |
843 | + partition_name(dev), vg_sb->vg_magic, | |
844 | + HSM_VG_SB_MAGIC); | |
845 | + return -1; | |
846 | + } | |
847 | + | |
848 | + vg->nr_lv = 0; | |
849 | + for (i = 0; i < HSM_MAX_LVS_PER_VG; i++) { | |
850 | + unsigned int id; | |
851 | + lv_desc = vg->vg_sb->lv_array + i; | |
852 | + | |
853 | + id = lv_desc->lv_id; | |
854 | + if (!id) { | |
855 | + printk("... LV desc %d empty\n", i); | |
856 | + continue; | |
857 | + } | |
858 | + if (id >= HSM_MAX_LVS_PER_VG) { | |
859 | + MD_BUG(); | |
860 | + continue; | |
861 | + } | |
862 | + | |
863 | + lv = vg->lv_array + id; | |
864 | + if (lv->vg) { | |
865 | + MD_BUG(); | |
866 | + continue; | |
867 | + } | |
868 | + lv->log_id = id; | |
869 | + lv->vg = vg; | |
870 | + lv->max_indices = lv_desc->lv_max_indices; | |
871 | + lv->free_indices = lv_desc->lv_free_indices; | |
872 | + lv->root_index.data = lv_desc->lv_root_idx; | |
873 | + lv->dev = MKDEV(MD_MAJOR, lv_desc->md_id); | |
874 | + | |
875 | + vg->nr_lv++; | |
876 | + | |
877 | + map_lv(lv); | |
878 | + if (read_root_index(lv)) { | |
879 | + vg->nr_lv--; | |
880 | + unmap_lv(lv); | |
881 | + memset(lv, 0, sizeof(*lv)); | |
882 | + } | |
883 | + } | |
884 | + if (vg->nr_lv != vg_sb->nr_lvs) | |
885 | + MD_BUG(); | |
886 | + | |
887 | + return 0; | |
888 | +} | |
889 | + | |
890 | +static int hsm_run (mddev_t *mddev) | |
891 | +{ | |
892 | + int i; | |
893 | + vg_t *vg; | |
894 | + mdk_rdev_t *rdev; | |
895 | + | |
896 | + MOD_INC_USE_COUNT; | |
897 | + | |
898 | + vg = kmalloc (sizeof (*vg), GFP_KERNEL); | |
899 | + if (!vg) | |
900 | + goto out; | |
901 | + memset(vg, 0, sizeof(*vg)); | |
902 | + mddev->private = vg; | |
903 | + vg->mddev = mddev; | |
904 | + | |
905 | + if (md_check_ordering(mddev)) { | |
906 | + printk("hsm: disks are not ordered, aborting!\n"); | |
907 | + goto out; | |
908 | + } | |
909 | + | |
910 | + set_blocksize (mddev_to_kdev(mddev), HSM_BLOCKSIZE); | |
911 | + | |
912 | + vg->nr_pv = mddev->nb_dev; | |
913 | + ITERATE_RDEV_ORDERED(mddev,rdev,i) { | |
914 | + pv_t *pv = vg->pv_array + i; | |
915 | + | |
916 | + pv->dev = rdev->dev; | |
917 | + fsync_dev (pv->dev); | |
918 | + set_blocksize (pv->dev, HSM_BLOCKSIZE); | |
919 | + pv->phys_nr = i; | |
920 | + if (init_pv(pv)) | |
921 | + goto out; | |
922 | + } | |
923 | + | |
924 | + init_vg(vg); | |
925 | + | |
926 | + return 0; | |
927 | + | |
928 | +out: | |
929 | + if (vg) { | |
930 | + kfree(vg); | |
931 | + mddev->private = NULL; | |
932 | + } | |
933 | + MOD_DEC_USE_COUNT; | |
934 | + | |
935 | + return 1; | |
936 | +} | |
937 | + | |
938 | +static int hsm_stop (mddev_t *mddev) | |
939 | +{ | |
940 | + lv_t *lv; | |
941 | + vg_t *vg; | |
942 | + int i; | |
943 | + | |
944 | + vg = mddev_to_vg(mddev); | |
945 | + | |
946 | + for (i = 0; i < HSM_MAX_LVS_PER_VG; i++) { | |
947 | + lv = vg->lv_array + i; | |
948 | + if (!lv->log_id) | |
949 | + continue; | |
950 | + print_lv(lv); | |
951 | + free_lv_indextree(lv); | |
952 | + unmap_lv(lv); | |
953 | + } | |
954 | + for (i = 0; i < vg->nr_pv; i++) | |
955 | + free_pv(vg->pv_array + i); | |
956 | + | |
957 | + kfree(vg); | |
958 | + | |
959 | + MOD_DEC_USE_COUNT; | |
960 | + | |
961 | + return 0; | |
962 | +} | |
963 | + | |
964 | + | |
965 | +static int hsm_status (char *page, mddev_t *mddev) | |
966 | +{ | |
967 | + int sz = 0, i; | |
968 | + lv_t *lv; | |
969 | + vg_t *vg; | |
970 | + | |
971 | + vg = mddev_to_vg(mddev); | |
972 | + | |
973 | + for (i = 0; i < HSM_MAX_LVS_PER_VG; i++) { | |
974 | + lv = vg->lv_array + i; | |
975 | + if (!lv->log_id) | |
976 | + continue; | |
977 | + sz += sprintf(page+sz, "<LV%d %d/%d blocks used> ", lv->log_id, | |
978 | + lv->max_indices - lv->free_indices, lv->max_indices); | |
979 | + } | |
980 | + return sz; | |
981 | +} | |
982 | + | |
983 | + | |
984 | +static mdk_personality_t hsm_personality= | |
985 | +{ | |
986 | + "hsm", | |
987 | + hsm_map, | |
988 | + NULL, | |
989 | + NULL, | |
990 | + hsm_run, | |
991 | + hsm_stop, | |
992 | + hsm_status, | |
993 | + NULL, | |
994 | + 0, | |
995 | + NULL, | |
996 | + NULL, | |
997 | + NULL, | |
998 | + NULL | |
999 | +}; | |
1000 | + | |
1001 | +#ifndef MODULE | |
1002 | + | |
1003 | +md__initfunc(void hsm_init (void)) | |
1004 | +{ | |
1005 | + register_md_personality (HSM, &hsm_personality); | |
1006 | +} | |
1007 | + | |
1008 | +#else | |
1009 | + | |
1010 | +int init_module (void) | |
1011 | +{ | |
1012 | + return (register_md_personality (HSM, &hsm_personality)); | |
1013 | +} | |
1014 | + | |
1015 | +void cleanup_module (void) | |
1016 | +{ | |
1017 | + unregister_md_personality (HSM); | |
1018 | +} | |
1019 | + | |
1020 | +#endif | |
1021 | + | |
1022 | +/* | |
1023 | + * This Linus-trick catches bugs via the linker. | |
1024 | + */ | |
1025 | + | |
1026 | +extern void __BUG__in__hsm_dot_c_1(void); | |
1027 | +extern void __BUG__in__hsm_dot_c_2(void); | |
1028 | +extern void __BUG__in__hsm_dot_c_3(void); | |
1029 | +extern void __BUG__in__hsm_dot_c_4(void); | |
1030 | +extern void __BUG__in__hsm_dot_c_5(void); | |
1031 | +extern void __BUG__in__hsm_dot_c_6(void); | |
1032 | +extern void __BUG__in__hsm_dot_c_7(void); | |
1033 | + | |
1034 | +void bugcatcher (void) | |
1035 | +{ | |
1036 | + if (sizeof(pv_block_group_t) != HSM_BLOCKSIZE) | |
1037 | + __BUG__in__hsm_dot_c_1(); | |
1038 | + if (sizeof(lv_index_block_t) != HSM_BLOCKSIZE) | |
1039 | + __BUG__in__hsm_dot_c_2(); | |
1040 | + | |
1041 | + if (sizeof(pv_sb_t) != HSM_BLOCKSIZE) | |
1042 | + __BUG__in__hsm_dot_c_4(); | |
1043 | + if (sizeof(lv_sb_t) != HSM_BLOCKSIZE) | |
1044 | + __BUG__in__hsm_dot_c_3(); | |
1045 | + if (sizeof(vg_sb_t) != HSM_BLOCKSIZE) | |
1046 | + __BUG__in__hsm_dot_c_6(); | |
1047 | + | |
1048 | + if (sizeof(lv_lptr_t) != 16) | |
1049 | + __BUG__in__hsm_dot_c_5(); | |
1050 | + if (sizeof(pv_pptr_t) != 16) | |
1051 | + __BUG__in__hsm_dot_c_6(); | |
1052 | +} | |
1053 | + | |
1054 | diff -ruN linux.orig/drivers/block/linear.c linux-2.2.16/drivers/block/linear.c | |
1055 | --- linux.orig/drivers/block/linear.c Sat Nov 8 20:39:12 1997 | |
1056 | +++ linux-2.2.16/drivers/block/linear.c Fri Jun 9 11:37:45 2000 | |
1057 | @@ -1,4 +1,3 @@ | |
1058 | - | |
1059 | /* | |
1060 | linear.c : Multiple Devices driver for Linux | |
1061 | Copyright (C) 1994-96 Marc ZYNGIER | |
1062 | @@ -19,186 +18,207 @@ | |
1063 | ||
1064 | #include <linux/module.h> | |
1065 | ||
1066 | -#include <linux/md.h> | |
1067 | +#include <linux/raid/md.h> | |
1068 | #include <linux/malloc.h> | |
1069 | -#include <linux/init.h> | |
1070 | ||
1071 | -#include "linear.h" | |
1072 | +#include <linux/raid/linear.h> | |
1073 | ||
1074 | #define MAJOR_NR MD_MAJOR | |
1075 | #define MD_DRIVER | |
1076 | #define MD_PERSONALITY | |
1077 | ||
1078 | -static int linear_run (int minor, struct md_dev *mddev) | |
1079 | +static int linear_run (mddev_t *mddev) | |
1080 | { | |
1081 | - int cur=0, i, size, dev0_size, nb_zone; | |
1082 | - struct linear_data *data; | |
1083 | - | |
1084 | - MOD_INC_USE_COUNT; | |
1085 | - | |
1086 | - mddev->private=kmalloc (sizeof (struct linear_data), GFP_KERNEL); | |
1087 | - data=(struct linear_data *) mddev->private; | |
1088 | - | |
1089 | - /* | |
1090 | - Find out the smallest device. This was previously done | |
1091 | - at registry time, but since it violates modularity, | |
1092 | - I moved it here... Any comment ? ;-) | |
1093 | - */ | |
1094 | - | |
1095 | - data->smallest=mddev->devices; | |
1096 | - for (i=1; i<mddev->nb_dev; i++) | |
1097 | - if (data->smallest->size > mddev->devices[i].size) | |
1098 | - data->smallest=mddev->devices+i; | |
1099 | - | |
1100 | - nb_zone=data->nr_zones= | |
1101 | - md_size[minor]/data->smallest->size + | |
1102 | - (md_size[minor]%data->smallest->size ? 1 : 0); | |
1103 | - | |
1104 | - data->hash_table=kmalloc (sizeof (struct linear_hash)*nb_zone, GFP_KERNEL); | |
1105 | - | |
1106 | - size=mddev->devices[cur].size; | |
1107 | + linear_conf_t *conf; | |
1108 | + struct linear_hash *table; | |
1109 | + mdk_rdev_t *rdev; | |
1110 | + int size, i, j, nb_zone; | |
1111 | + unsigned int curr_offset; | |
1112 | + | |
1113 | + MOD_INC_USE_COUNT; | |
1114 | + | |
1115 | + conf = kmalloc (sizeof (*conf), GFP_KERNEL); | |
1116 | + if (!conf) | |
1117 | + goto out; | |
1118 | + mddev->private = conf; | |
1119 | + | |
1120 | + if (md_check_ordering(mddev)) { | |
1121 | + printk("linear: disks are not ordered, aborting!\n"); | |
1122 | + goto out; | |
1123 | + } | |
1124 | + /* | |
1125 | + * Find the smallest device. | |
1126 | + */ | |
1127 | + | |
1128 | + conf->smallest = NULL; | |
1129 | + curr_offset = 0; | |
1130 | + ITERATE_RDEV_ORDERED(mddev,rdev,j) { | |
1131 | + dev_info_t *disk = conf->disks + j; | |
1132 | + | |
1133 | + disk->dev = rdev->dev; | |
1134 | + disk->size = rdev->size; | |
1135 | + disk->offset = curr_offset; | |
1136 | + | |
1137 | + curr_offset += disk->size; | |
1138 | + | |
1139 | + if (!conf->smallest || (disk->size < conf->smallest->size)) | |
1140 | + conf->smallest = disk; | |
1141 | + } | |
1142 | + | |
1143 | + nb_zone = conf->nr_zones = | |
1144 | + md_size[mdidx(mddev)] / conf->smallest->size + | |
1145 | + ((md_size[mdidx(mddev)] % conf->smallest->size) ? 1 : 0); | |
1146 | + | |
1147 | + conf->hash_table = kmalloc (sizeof (struct linear_hash) * nb_zone, | |
1148 | + GFP_KERNEL); | |
1149 | + if (!conf->hash_table) | |
1150 | + goto out; | |
1151 | + | |
1152 | + /* | |
1153 | + * Here we generate the linear hash table | |
1154 | + */ | |
1155 | + table = conf->hash_table; | |
1156 | + i = 0; | |
1157 | + size = 0; | |
1158 | + for (j = 0; j < mddev->nb_dev; j++) { | |
1159 | + dev_info_t *disk = conf->disks + j; | |
1160 | + | |
1161 | + if (size < 0) { | |
1162 | + table->dev1 = disk; | |
1163 | + table++; | |
1164 | + } | |
1165 | + size += disk->size; | |
1166 | + | |
1167 | + while (size) { | |
1168 | + table->dev0 = disk; | |
1169 | + size -= conf->smallest->size; | |
1170 | + if (size < 0) | |
1171 | + break; | |
1172 | + table->dev1 = NULL; | |
1173 | + table++; | |
1174 | + } | |
1175 | + } | |
1176 | + table->dev1 = NULL; | |
1177 | + | |
1178 | + return 0; | |
1179 | + | |
1180 | +out: | |
1181 | + if (conf) | |
1182 | + kfree(conf); | |
1183 | + MOD_DEC_USE_COUNT; | |
1184 | + return 1; | |
1185 | +} | |
1186 | + | |
1187 | +static int linear_stop (mddev_t *mddev) | |
1188 | +{ | |
1189 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
1190 | + | |
1191 | + kfree(conf->hash_table); | |
1192 | + kfree(conf); | |
1193 | ||
1194 | - i=0; | |
1195 | - while (cur<mddev->nb_dev) | |
1196 | - { | |
1197 | - data->hash_table[i].dev0=mddev->devices+cur; | |
1198 | + MOD_DEC_USE_COUNT; | |
1199 | ||
1200 | - if (size>=data->smallest->size) /* If we completely fill the slot */ | |
1201 | - { | |
1202 | - data->hash_table[i++].dev1=NULL; | |
1203 | - size-=data->smallest->size; | |
1204 | - | |
1205 | - if (!size) | |
1206 | - { | |
1207 | - if (++cur==mddev->nb_dev) continue; | |
1208 | - size=mddev->devices[cur].size; | |
1209 | - } | |
1210 | - | |
1211 | - continue; | |
1212 | - } | |
1213 | - | |
1214 | - if (++cur==mddev->nb_dev) /* Last dev, set dev1 as NULL */ | |
1215 | - { | |
1216 | - data->hash_table[i].dev1=NULL; | |
1217 | - continue; | |
1218 | - } | |
1219 | - | |
1220 | - dev0_size=size; /* Here, we use a 2nd dev to fill the slot */ | |
1221 | - size=mddev->devices[cur].size; | |
1222 | - data->hash_table[i++].dev1=mddev->devices+cur; | |
1223 | - size-=(data->smallest->size - dev0_size); | |
1224 | - } | |
1225 | - | |
1226 | - return 0; | |
1227 | -} | |
1228 | - | |
1229 | -static int linear_stop (int minor, struct md_dev *mddev) | |
1230 | -{ | |
1231 | - struct linear_data *data=(struct linear_data *) mddev->private; | |
1232 | - | |
1233 | - kfree (data->hash_table); | |
1234 | - kfree (data); | |
1235 | - | |
1236 | - MOD_DEC_USE_COUNT; | |
1237 | - | |
1238 | - return 0; | |
1239 | + return 0; | |
1240 | } | |
1241 | ||
1242 | ||
1243 | -static int linear_map (struct md_dev *mddev, kdev_t *rdev, | |
1244 | +static int linear_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
1245 | unsigned long *rsector, unsigned long size) | |
1246 | { | |
1247 | - struct linear_data *data=(struct linear_data *) mddev->private; | |
1248 | - struct linear_hash *hash; | |
1249 | - struct real_dev *tmp_dev; | |
1250 | - long block; | |
1251 | - | |
1252 | - block=*rsector >> 1; | |
1253 | - hash=data->hash_table+(block/data->smallest->size); | |
1254 | - | |
1255 | - if (block >= (hash->dev0->size + hash->dev0->offset)) | |
1256 | - { | |
1257 | - if (!hash->dev1) | |
1258 | - { | |
1259 | - printk ("linear_map : hash->dev1==NULL for block %ld\n", block); | |
1260 | - return (-1); | |
1261 | - } | |
1262 | - | |
1263 | - tmp_dev=hash->dev1; | |
1264 | - } | |
1265 | - else | |
1266 | - tmp_dev=hash->dev0; | |
1267 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
1268 | + struct linear_hash *hash; | |
1269 | + dev_info_t *tmp_dev; | |
1270 | + long block; | |
1271 | + | |
1272 | + block = *rsector >> 1; | |
1273 | + hash = conf->hash_table + (block / conf->smallest->size); | |
1274 | + | |
1275 | + if (block >= (hash->dev0->size + hash->dev0->offset)) | |
1276 | + { | |
1277 | + if (!hash->dev1) | |
1278 | + { | |
1279 | + printk ("linear_map : hash->dev1==NULL for block %ld\n", | |
1280 | + block); | |
1281 | + return -1; | |
1282 | + } | |
1283 | + tmp_dev = hash->dev1; | |
1284 | + } else | |
1285 | + tmp_dev = hash->dev0; | |
1286 | ||
1287 | - if (block >= (tmp_dev->size + tmp_dev->offset) || block < tmp_dev->offset) | |
1288 | - printk ("Block %ld out of bounds on dev %s size %d offset %d\n", | |
1289 | - block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset); | |
1290 | + if (block >= (tmp_dev->size + tmp_dev->offset) | |
1291 | + || block < tmp_dev->offset) | |
1292 | + printk ("Block %ld out of bounds on dev %s size %d offset %d\n", | |
1293 | + block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset); | |
1294 | ||
1295 | - *rdev=tmp_dev->dev; | |
1296 | - *rsector=(block-(tmp_dev->offset)) << 1; | |
1297 | + *rdev = tmp_dev->dev; | |
1298 | + *rsector = (block - tmp_dev->offset) << 1; | |
1299 | ||
1300 | - return (0); | |
1301 | + return 0; | |
1302 | } | |
1303 | ||
1304 | -static int linear_status (char *page, int minor, struct md_dev *mddev) | |
1305 | +static int linear_status (char *page, mddev_t *mddev) | |
1306 | { | |
1307 | - int sz=0; | |
1308 | + int sz=0; | |
1309 | ||
1310 | #undef MD_DEBUG | |
1311 | #ifdef MD_DEBUG | |
1312 | - int j; | |
1313 | - struct linear_data *data=(struct linear_data *) mddev->private; | |
1314 | + int j; | |
1315 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
1316 | ||
1317 | - sz+=sprintf (page+sz, " "); | |
1318 | - for (j=0; j<data->nr_zones; j++) | |
1319 | - { | |
1320 | - sz+=sprintf (page+sz, "[%s", | |
1321 | - partition_name (data->hash_table[j].dev0->dev)); | |
1322 | - | |
1323 | - if (data->hash_table[j].dev1) | |
1324 | - sz+=sprintf (page+sz, "/%s] ", | |
1325 | - partition_name(data->hash_table[j].dev1->dev)); | |
1326 | - else | |
1327 | - sz+=sprintf (page+sz, "] "); | |
1328 | - } | |
1329 | - | |
1330 | - sz+=sprintf (page+sz, "\n"); | |
1331 | + sz += sprintf(page+sz, " "); | |
1332 | + for (j = 0; j < conf->nr_zones; j++) | |
1333 | + { | |
1334 | + sz += sprintf(page+sz, "[%s", | |
1335 | + partition_name(conf->hash_table[j].dev0->dev)); | |
1336 | + | |
1337 | + if (conf->hash_table[j].dev1) | |
1338 | + sz += sprintf(page+sz, "/%s] ", | |
1339 | + partition_name(conf->hash_table[j].dev1->dev)); | |
1340 | + else | |
1341 | + sz += sprintf(page+sz, "] "); | |
1342 | + } | |
1343 | + sz += sprintf(page+sz, "\n"); | |
1344 | #endif | |
1345 | - sz+=sprintf (page+sz, " %dk rounding", 1<<FACTOR_SHIFT(FACTOR(mddev))); | |
1346 | - return sz; | |
1347 | + sz += sprintf(page+sz, " %dk rounding", mddev->param.chunk_size/1024); | |
1348 | + return sz; | |
1349 | } | |
1350 | ||
1351 | ||
1352 | -static struct md_personality linear_personality= | |
1353 | +static mdk_personality_t linear_personality= | |
1354 | { | |
1355 | - "linear", | |
1356 | - linear_map, | |
1357 | - NULL, | |
1358 | - NULL, | |
1359 | - linear_run, | |
1360 | - linear_stop, | |
1361 | - linear_status, | |
1362 | - NULL, /* no ioctls */ | |
1363 | - 0 | |
1364 | + "linear", | |
1365 | + linear_map, | |
1366 | + NULL, | |
1367 | + NULL, | |
1368 | + linear_run, | |
1369 | + linear_stop, | |
1370 | + linear_status, | |
1371 | + NULL, | |
1372 | + 0, | |
1373 | + NULL, | |
1374 | + NULL, | |
1375 | + NULL, | |
1376 | + NULL | |
1377 | }; | |
1378 | ||
1379 | - | |
1380 | #ifndef MODULE | |
1381 | ||
1382 | -__initfunc(void linear_init (void)) | |
1383 | +md__initfunc(void linear_init (void)) | |
1384 | { | |
1385 | - register_md_personality (LINEAR, &linear_personality); | |
1386 | + register_md_personality (LINEAR, &linear_personality); | |
1387 | } | |
1388 | ||
1389 | #else | |
1390 | ||
1391 | int init_module (void) | |
1392 | { | |
1393 | - return (register_md_personality (LINEAR, &linear_personality)); | |
1394 | + return (register_md_personality (LINEAR, &linear_personality)); | |
1395 | } | |
1396 | ||
1397 | void cleanup_module (void) | |
1398 | { | |
1399 | - unregister_md_personality (LINEAR); | |
1400 | + unregister_md_personality (LINEAR); | |
1401 | } | |
1402 | ||
1403 | #endif | |
1404 | + | |
1405 | diff -ruN linux.orig/drivers/block/linear.h linux-2.2.16/drivers/block/linear.h | |
1406 | --- linux.orig/drivers/block/linear.h Fri Nov 22 15:07:23 1996 | |
1407 | +++ linux-2.2.16/drivers/block/linear.h Thu Jan 1 01:00:00 1970 | |
1408 | @@ -1,16 +0,0 @@ | |
1409 | -#ifndef _LINEAR_H | |
1410 | -#define _LINEAR_H | |
1411 | - | |
1412 | -struct linear_hash | |
1413 | -{ | |
1414 | - struct real_dev *dev0, *dev1; | |
1415 | -}; | |
1416 | - | |
1417 | -struct linear_data | |
1418 | -{ | |
1419 | - struct linear_hash *hash_table; /* Dynamically allocated */ | |
1420 | - struct real_dev *smallest; | |
1421 | - int nr_zones; | |
1422 | -}; | |
1423 | - | |
1424 | -#endif | |
1425 | diff -ruN linux.orig/drivers/block/ll_rw_blk.c linux-2.2.16/drivers/block/ll_rw_blk.c | |
1426 | --- linux.orig/drivers/block/ll_rw_blk.c Wed Jun 7 23:26:42 2000 | |
1427 | +++ linux-2.2.16/drivers/block/ll_rw_blk.c Fri Jun 9 11:37:45 2000 | |
1428 | @@ -23,6 +23,7 @@ | |
1429 | #include <asm/io.h> | |
1430 | #include <asm/uaccess.h> | |
1431 | #include <linux/blk.h> | |
1432 | +#include <linux/raid/md.h> | |
1433 | ||
1434 | #include <linux/module.h> | |
1435 | ||
1436 | @@ -53,6 +54,11 @@ | |
1437 | spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; | |
1438 | ||
1439 | /* | |
1440 | + * per-major idle-IO detection | |
1441 | + */ | |
1442 | +unsigned long io_events[MAX_BLKDEV] = {0, }; | |
1443 | + | |
1444 | +/* | |
1445 | * used to wait on when there are no free requests | |
1446 | */ | |
1447 | struct wait_queue * wait_for_request; | |
1448 | @@ -641,6 +647,8 @@ | |
1449 | return; | |
1450 | /* Maybe the above fixes it, and maybe it doesn't boot. Life is interesting */ | |
1451 | lock_buffer(bh); | |
1452 | + if (!buffer_lowprio(bh)) | |
1453 | + io_events[major]++; | |
1454 | ||
1455 | if (blk_size[major]) { | |
1456 | unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1; | |
1457 | @@ -892,7 +900,7 @@ | |
1458 | bh[i]->b_rsector=bh[i]->b_blocknr*(bh[i]->b_size >> 9); | |
1459 | #ifdef CONFIG_BLK_DEV_MD | |
1460 | if (major==MD_MAJOR && | |
1461 | - md_map (MINOR(bh[i]->b_dev), &bh[i]->b_rdev, | |
1462 | + md_map (bh[i]->b_dev, &bh[i]->b_rdev, | |
1463 | &bh[i]->b_rsector, bh[i]->b_size >> 9)) { | |
1464 | printk (KERN_ERR | |
1465 | "Bad md_map in ll_rw_block\n"); | |
1466 | @@ -912,7 +920,7 @@ | |
1467 | set_bit(BH_Req, &bh[i]->b_state); | |
1468 | #ifdef CONFIG_BLK_DEV_MD | |
1469 | if (MAJOR(bh[i]->b_dev) == MD_MAJOR) { | |
1470 | - md_make_request(MINOR (bh[i]->b_dev), rw, bh[i]); | |
1471 | + md_make_request(bh[i], rw); | |
1472 | continue; | |
1473 | } | |
1474 | #endif | |
1475 | diff -ruN linux.orig/drivers/block/md.c linux-2.2.16/drivers/block/md.c | |
1476 | --- linux.orig/drivers/block/md.c Wed Jun 7 23:26:42 2000 | |
1477 | +++ linux-2.2.16/drivers/block/md.c Fri Jun 9 11:43:43 2000 | |
1478 | @@ -1,21 +1,17 @@ | |
1479 | - | |
1480 | /* | |
1481 | md.c : Multiple Devices driver for Linux | |
1482 | - Copyright (C) 1994-96 Marc ZYNGIER | |
1483 | - <zyngier@ufr-info-p7.ibp.fr> or | |
1484 | - <maz@gloups.fdn.fr> | |
1485 | + Copyright (C) 1998, 1999 Ingo Molnar | |
1486 | ||
1487 | - A lot of inspiration came from hd.c ... | |
1488 | + completely rewritten, based on the MD driver code from Marc Zyngier | |
1489 | ||
1490 | - kerneld support by Boris Tobotras <boris@xtalk.msk.su> | |
1491 | - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net> | |
1492 | + Changes: | |
1493 | ||
1494 | - RAID-1/RAID-5 extensions by: | |
1495 | - Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
1496 | + - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar | |
1497 | + - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net> | |
1498 | + - kerneld support by Boris Tobotras <boris@xtalk.msk.su> | |
1499 | + - kmod support by: Cyrus Durgin | |
1500 | + - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com> | |
1501 | ||
1502 | - Changes for kmod by: | |
1503 | - Cyrus Durgin | |
1504 | - | |
1505 | This program is free software; you can redistribute it and/or modify | |
1506 | it under the terms of the GNU General Public License as published by | |
1507 | the Free Software Foundation; either version 2, or (at your option) | |
1508 | @@ -26,809 +22,3007 @@ | |
1509 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
1510 | */ | |
1511 | ||
1512 | -/* | |
1513 | - * Current RAID-1,4,5 parallel reconstruction speed limit is 1024 KB/sec, so | |
1514 | - * the extra system load does not show up that much. Increase it if your | |
1515 | - * system can take more. | |
1516 | - */ | |
1517 | -#define SPEED_LIMIT 1024 | |
1518 | +#include <linux/raid/md.h> | |
1519 | +#include <linux/raid/xor.h> | |
1520 | ||
1521 | -#include <linux/config.h> | |
1522 | -#include <linux/module.h> | |
1523 | -#include <linux/version.h> | |
1524 | -#include <linux/malloc.h> | |
1525 | -#include <linux/mm.h> | |
1526 | -#include <linux/md.h> | |
1527 | -#include <linux/hdreg.h> | |
1528 | -#include <linux/stat.h> | |
1529 | -#include <linux/fs.h> | |
1530 | -#include <linux/proc_fs.h> | |
1531 | -#include <linux/blkdev.h> | |
1532 | -#include <linux/genhd.h> | |
1533 | -#include <linux/smp_lock.h> | |
1534 | #ifdef CONFIG_KMOD | |
1535 | #include <linux/kmod.h> | |
1536 | #endif | |
1537 | -#include <linux/errno.h> | |
1538 | -#include <linux/init.h> | |
1539 | ||
1540 | #define __KERNEL_SYSCALLS__ | |
1541 | #include <linux/unistd.h> | |
1542 | ||
1543 | +#include <asm/unaligned.h> | |
1544 | + | |
1545 | +extern asmlinkage int sys_sched_yield(void); | |
1546 | +extern asmlinkage int sys_setsid(void); | |
1547 | + | |
1548 | +extern unsigned long io_events[MAX_BLKDEV]; | |
1549 | + | |
1550 | #define MAJOR_NR MD_MAJOR | |
1551 | #define MD_DRIVER | |
1552 | ||
1553 | #include <linux/blk.h> | |
1554 | -#include <asm/uaccess.h> | |
1555 | -#include <asm/bitops.h> | |
1556 | -#include <asm/atomic.h> | |
1557 | ||
1558 | #ifdef CONFIG_MD_BOOT | |
1559 | -extern kdev_t name_to_kdev_t(char *line) __init; | |
1560 | +extern kdev_t name_to_kdev_t(char *line) md__init; | |
1561 | #endif | |
1562 | ||
1563 | -static struct hd_struct md_hd_struct[MAX_MD_DEV]; | |
1564 | -static int md_blocksizes[MAX_MD_DEV]; | |
1565 | -int md_maxreadahead[MAX_MD_DEV]; | |
1566 | -#if SUPPORT_RECONSTRUCTION | |
1567 | -static struct md_thread *md_sync_thread = NULL; | |
1568 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
1569 | +static mdk_personality_t *pers[MAX_PERSONALITY] = {NULL, }; | |
1570 | + | |
1571 | +/* | |
1572 | + * these have to be allocated separately because external | |
1573 | + * subsystems want to have a pre-defined structure | |
1574 | + */ | |
1575 | +struct hd_struct md_hd_struct[MAX_MD_DEVS]; | |
1576 | +static int md_blocksizes[MAX_MD_DEVS]; | |
1577 | +static int md_maxreadahead[MAX_MD_DEVS]; | |
1578 | +static mdk_thread_t *md_recovery_thread = NULL; | |
1579 | ||
1580 | -int md_size[MAX_MD_DEV]={0, }; | |
1581 | +int md_size[MAX_MD_DEVS] = {0, }; | |
1582 | ||
1583 | static void md_geninit (struct gendisk *); | |
1584 | ||
1585 | static struct gendisk md_gendisk= | |
1586 | { | |
1587 | - MD_MAJOR, | |
1588 | - "md", | |
1589 | - 0, | |
1590 | - 1, | |
1591 | - MAX_MD_DEV, | |
1592 | - md_geninit, | |
1593 | - md_hd_struct, | |
1594 | - md_size, | |
1595 | - MAX_MD_DEV, | |
1596 | - NULL, | |
1597 | - NULL | |
1598 | + MD_MAJOR, | |
1599 | + "md", | |
1600 | + 0, | |
1601 | + 1, | |
1602 | + MAX_MD_DEVS, | |
1603 | + md_geninit, | |
1604 | + md_hd_struct, | |
1605 | + md_size, | |
1606 | + MAX_MD_DEVS, | |
1607 | + NULL, | |
1608 | + NULL | |
1609 | }; | |
1610 | ||
1611 | -static struct md_personality *pers[MAX_PERSONALITY]={NULL, }; | |
1612 | -struct md_dev md_dev[MAX_MD_DEV]; | |
1613 | - | |
1614 | -int md_thread(void * arg); | |
1615 | +/* | |
1616 | + * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | |
1617 | + * is 100 KB/sec, so the extra system load does not show up that much. | |
1618 | + * Increase it if you want to have more _guaranteed_ speed. Note that | |
1619 | + * the RAID driver will use the maximum available bandwith if the IO | |
1620 | + * subsystem is idle. | |
1621 | + * | |
1622 | + * you can change it via /proc/sys/dev/speed-limit | |
1623 | + */ | |
1624 | ||
1625 | -static struct gendisk *find_gendisk (kdev_t dev) | |
1626 | -{ | |
1627 | - struct gendisk *tmp=gendisk_head; | |
1628 | +static int sysctl_speed_limit = 100; | |
1629 | ||
1630 | - while (tmp != NULL) | |
1631 | - { | |
1632 | - if (tmp->major==MAJOR(dev)) | |
1633 | - return (tmp); | |
1634 | - | |
1635 | - tmp=tmp->next; | |
1636 | - } | |
1637 | +static struct ctl_table_header *md_table_header; | |
1638 | ||
1639 | - return (NULL); | |
1640 | -} | |
1641 | +static ctl_table md_table[] = { | |
1642 | + {DEV_MD_SPEED_LIMIT, "speed-limit", | |
1643 | + &sysctl_speed_limit, sizeof(int), 0644, NULL, &proc_dointvec}, | |
1644 | + {0} | |
1645 | +}; | |
1646 | ||
1647 | -char *partition_name (kdev_t dev) | |
1648 | -{ | |
1649 | - static char name[40]; /* This should be long | |
1650 | - enough for a device name ! */ | |
1651 | - struct gendisk *hd = find_gendisk (dev); | |
1652 | +static ctl_table md_dir_table[] = { | |
1653 | + {DEV_MD, "md", NULL, 0, 0555, md_table}, | |
1654 | + {0} | |
1655 | +}; | |
1656 | ||
1657 | - if (!hd) | |
1658 | - { | |
1659 | - sprintf (name, "[dev %s]", kdevname(dev)); | |
1660 | - return (name); | |
1661 | - } | |
1662 | +static ctl_table md_root_table[] = { | |
1663 | + {CTL_DEV, "dev", NULL, 0, 0555, md_dir_table}, | |
1664 | + {0} | |
1665 | +}; | |
1666 | ||
1667 | - return disk_name (hd, MINOR(dev), name); /* routine in genhd.c */ | |
1668 | +static void md_register_sysctl(void) | |
1669 | +{ | |
1670 | + md_table_header = register_sysctl_table(md_root_table, 1); | |
1671 | } | |
1672 | ||
1673 | -static int legacy_raid_sb (int minor, int pnum) | |
1674 | +void md_unregister_sysctl(void) | |
1675 | { | |
1676 | - int i, factor; | |
1677 | + unregister_sysctl_table(md_table_header); | |
1678 | +} | |
1679 | + | |
1680 | +/* | |
1681 | + * The mapping between kdev and mddev is not necessary a simple | |
1682 | + * one! Eg. HSM uses several sub-devices to implement Logical | |
1683 | + * Volumes. All these sub-devices map to the same mddev. | |
1684 | + */ | |
1685 | +dev_mapping_t mddev_map [MAX_MD_DEVS] = { {NULL, 0}, }; | |
1686 | ||
1687 | - factor = 1 << FACTOR_SHIFT(FACTOR((md_dev+minor))); | |
1688 | +void add_mddev_mapping (mddev_t * mddev, kdev_t dev, void *data) | |
1689 | +{ | |
1690 | + unsigned int minor = MINOR(dev); | |
1691 | ||
1692 | - /***** | |
1693 | - * do size and offset calculations. | |
1694 | - */ | |
1695 | - for (i=0; i<md_dev[minor].nb_dev; i++) { | |
1696 | - md_dev[minor].devices[i].size &= ~(factor - 1); | |
1697 | - md_size[minor] += md_dev[minor].devices[i].size; | |
1698 | - md_dev[minor].devices[i].offset=i ? (md_dev[minor].devices[i-1].offset + | |
1699 | - md_dev[minor].devices[i-1].size) : 0; | |
1700 | + if (MAJOR(dev) != MD_MAJOR) { | |
1701 | + MD_BUG(); | |
1702 | + return; | |
1703 | } | |
1704 | - if (pnum == RAID0 >> PERSONALITY_SHIFT) | |
1705 | - md_maxreadahead[minor] = MD_DEFAULT_DISK_READAHEAD * md_dev[minor].nb_dev; | |
1706 | - return 0; | |
1707 | + if (mddev_map[minor].mddev != NULL) { | |
1708 | + MD_BUG(); | |
1709 | + return; | |
1710 | + } | |
1711 | + mddev_map[minor].mddev = mddev; | |
1712 | + mddev_map[minor].data = data; | |
1713 | } | |
1714 | ||
1715 | -static void free_sb (struct md_dev *mddev) | |
1716 | +void del_mddev_mapping (mddev_t * mddev, kdev_t dev) | |
1717 | { | |
1718 | - int i; | |
1719 | - struct real_dev *realdev; | |
1720 | + unsigned int minor = MINOR(dev); | |
1721 | ||
1722 | - if (mddev->sb) { | |
1723 | - free_page((unsigned long) mddev->sb); | |
1724 | - mddev->sb = NULL; | |
1725 | + if (MAJOR(dev) != MD_MAJOR) { | |
1726 | + MD_BUG(); | |
1727 | + return; | |
1728 | } | |
1729 | - for (i = 0; i <mddev->nb_dev; i++) { | |
1730 | - realdev = mddev->devices + i; | |
1731 | - if (realdev->sb) { | |
1732 | - free_page((unsigned long) realdev->sb); | |
1733 | - realdev->sb = NULL; | |
1734 | - } | |
1735 | + if (mddev_map[minor].mddev != mddev) { | |
1736 | + MD_BUG(); | |
1737 | + return; | |
1738 | } | |
1739 | + mddev_map[minor].mddev = NULL; | |
1740 | + mddev_map[minor].data = NULL; | |
1741 | } | |
1742 | ||
1743 | /* | |
1744 | - * Check one RAID superblock for generic plausibility | |
1745 | + * Enables to iterate over all existing md arrays | |
1746 | */ | |
1747 | +static MD_LIST_HEAD(all_mddevs); | |
1748 | ||
1749 | -#define BAD_MAGIC KERN_ERR \ | |
1750 | -"md: %s: invalid raid superblock magic (%x) on block %u\n" | |
1751 | +static mddev_t * alloc_mddev (kdev_t dev) | |
1752 | +{ | |
1753 | + mddev_t * mddev; | |
1754 | ||
1755 | -#define OUT_OF_MEM KERN_ALERT \ | |
1756 | -"md: out of memory.\n" | |
1757 | + if (MAJOR(dev) != MD_MAJOR) { | |
1758 | + MD_BUG(); | |
1759 | + return 0; | |
1760 | + } | |
1761 | + mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); | |
1762 | + if (!mddev) | |
1763 | + return NULL; | |
1764 | + | |
1765 | + memset(mddev, 0, sizeof(*mddev)); | |
1766 | ||
1767 | -#define NO_DEVICE KERN_ERR \ | |
1768 | -"md: disabled device %s\n" | |
1769 | + mddev->__minor = MINOR(dev); | |
1770 | + mddev->reconfig_sem = MUTEX; | |
1771 | + mddev->recovery_sem = MUTEX; | |
1772 | + mddev->resync_sem = MUTEX; | |
1773 | + MD_INIT_LIST_HEAD(&mddev->disks); | |
1774 | + /* | |
1775 | + * The 'base' mddev is the one with data NULL. | |
1776 | + * personalities can create additional mddevs | |
1777 | + * if necessary. | |
1778 | + */ | |
1779 | + add_mddev_mapping(mddev, dev, 0); | |
1780 | + md_list_add(&mddev->all_mddevs, &all_mddevs); | |
1781 | ||
1782 | -#define SUCCESS 0 | |
1783 | -#define FAILURE -1 | |
1784 | + return mddev; | |
1785 | +} | |
1786 | ||
1787 | -static int analyze_one_sb (struct real_dev * rdev) | |
1788 | +static void free_mddev (mddev_t *mddev) | |
1789 | { | |
1790 | - int ret = FAILURE; | |
1791 | - struct buffer_head *bh; | |
1792 | - kdev_t dev = rdev->dev; | |
1793 | - md_superblock_t *sb; | |
1794 | + if (!mddev) { | |
1795 | + MD_BUG(); | |
1796 | + return; | |
1797 | + } | |
1798 | ||
1799 | /* | |
1800 | - * Read the superblock, it's at the end of the disk | |
1801 | + * Make sure nobody else is using this mddev | |
1802 | + * (careful, we rely on the global kernel lock here) | |
1803 | */ | |
1804 | - rdev->sb_offset = MD_NEW_SIZE_BLOCKS (blk_size[MAJOR(dev)][MINOR(dev)]); | |
1805 | - set_blocksize (dev, MD_SB_BYTES); | |
1806 | - bh = bread (dev, rdev->sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
1807 | - | |
1808 | - if (bh) { | |
1809 | - sb = (md_superblock_t *) bh->b_data; | |
1810 | - if (sb->md_magic != MD_SB_MAGIC) { | |
1811 | - printk (BAD_MAGIC, kdevname(dev), | |
1812 | - sb->md_magic, rdev->sb_offset); | |
1813 | - goto abort; | |
1814 | - } | |
1815 | - rdev->sb = (md_superblock_t *) __get_free_page(GFP_KERNEL); | |
1816 | - if (!rdev->sb) { | |
1817 | - printk (OUT_OF_MEM); | |
1818 | - goto abort; | |
1819 | - } | |
1820 | - memcpy (rdev->sb, bh->b_data, MD_SB_BYTES); | |
1821 | + while (md_atomic_read(&mddev->resync_sem.count) != 1) | |
1822 | + schedule(); | |
1823 | + while (md_atomic_read(&mddev->recovery_sem.count) != 1) | |
1824 | + schedule(); | |
1825 | ||
1826 | - rdev->size = sb->size; | |
1827 | - } else | |
1828 | - printk (NO_DEVICE,kdevname(rdev->dev)); | |
1829 | - ret = SUCCESS; | |
1830 | -abort: | |
1831 | - if (bh) | |
1832 | - brelse (bh); | |
1833 | - return ret; | |
1834 | + del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev))); | |
1835 | + md_list_del(&mddev->all_mddevs); | |
1836 | + MD_INIT_LIST_HEAD(&mddev->all_mddevs); | |
1837 | + kfree(mddev); | |
1838 | } | |
1839 | ||
1840 | -#undef SUCCESS | |
1841 | -#undef FAILURE | |
1842 | - | |
1843 | -#undef BAD_MAGIC | |
1844 | -#undef OUT_OF_MEM | |
1845 | -#undef NO_DEVICE | |
1846 | ||
1847 | -/* | |
1848 | - * Check a full RAID array for plausibility | |
1849 | - */ | |
1850 | +struct gendisk * find_gendisk (kdev_t dev) | |
1851 | +{ | |
1852 | + struct gendisk *tmp = gendisk_head; | |
1853 | ||
1854 | -#define INCONSISTENT KERN_ERR \ | |
1855 | -"md: superblock inconsistency -- run ckraid\n" | |
1856 | + while (tmp != NULL) { | |
1857 | + if (tmp->major == MAJOR(dev)) | |
1858 | + return (tmp); | |
1859 | + tmp = tmp->next; | |
1860 | + } | |
1861 | + return (NULL); | |
1862 | +} | |
1863 | ||
1864 | -#define OUT_OF_DATE KERN_ERR \ | |
1865 | -"md: superblock update time inconsistenty -- using the most recent one\n" | |
1866 | +mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) | |
1867 | +{ | |
1868 | + mdk_rdev_t * rdev; | |
1869 | + struct md_list_head *tmp; | |
1870 | ||
1871 | -#define OLD_VERSION KERN_ALERT \ | |
1872 | -"md: %s: unsupported raid array version %d.%d.%d\n" | |
1873 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
1874 | + if (rdev->desc_nr == nr) | |
1875 | + return rdev; | |
1876 | + } | |
1877 | + return NULL; | |
1878 | +} | |
1879 | ||
1880 | -#define NOT_CLEAN KERN_ERR \ | |
1881 | -"md: %s: raid array is not clean -- run ckraid\n" | |
1882 | +mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev) | |
1883 | +{ | |
1884 | + struct md_list_head *tmp; | |
1885 | + mdk_rdev_t *rdev; | |
1886 | ||
1887 | -#define NOT_CLEAN_IGNORE KERN_ERR \ | |
1888 | -"md: %s: raid array is not clean -- reconstructing parity\n" | |
1889 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
1890 | + if (rdev->dev == dev) | |
1891 | + return rdev; | |
1892 | + } | |
1893 | + return NULL; | |
1894 | +} | |
1895 | ||
1896 | -#define UNKNOWN_LEVEL KERN_ERR \ | |
1897 | -"md: %s: unsupported raid level %d\n" | |
1898 | +static MD_LIST_HEAD(device_names); | |
1899 | ||
1900 | -static int analyze_sbs (int minor, int pnum) | |
1901 | +char * partition_name (kdev_t dev) | |
1902 | { | |
1903 | - struct md_dev *mddev = md_dev + minor; | |
1904 | - int i, N = mddev->nb_dev, out_of_date = 0; | |
1905 | - struct real_dev * disks = mddev->devices; | |
1906 | - md_superblock_t *sb, *freshest = NULL; | |
1907 | + struct gendisk *hd; | |
1908 | + static char nomem [] = "<nomem>"; | |
1909 | + dev_name_t *dname; | |
1910 | + struct md_list_head *tmp = device_names.next; | |
1911 | ||
1912 | - /* | |
1913 | - * RAID-0 and linear don't use a RAID superblock | |
1914 | - */ | |
1915 | - if (pnum == RAID0 >> PERSONALITY_SHIFT || | |
1916 | - pnum == LINEAR >> PERSONALITY_SHIFT) | |
1917 | - return legacy_raid_sb (minor, pnum); | |
1918 | + while (tmp != &device_names) { | |
1919 | + dname = md_list_entry(tmp, dev_name_t, list); | |
1920 | + if (dname->dev == dev) | |
1921 | + return dname->name; | |
1922 | + tmp = tmp->next; | |
1923 | + } | |
1924 | + | |
1925 | + dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL); | |
1926 | ||
1927 | + if (!dname) | |
1928 | + return nomem; | |
1929 | /* | |
1930 | - * Verify the RAID superblock on each real device | |
1931 | + * ok, add this new device name to the list | |
1932 | */ | |
1933 | - for (i = 0; i < N; i++) | |
1934 | - if (analyze_one_sb(disks+i)) | |
1935 | - goto abort; | |
1936 | + hd = find_gendisk (dev); | |
1937 | + | |
1938 | + if (!hd) | |
1939 | + sprintf (dname->name, "[dev %s]", kdevname(dev)); | |
1940 | + else | |
1941 | + disk_name (hd, MINOR(dev), dname->name); | |
1942 | + | |
1943 | + dname->dev = dev; | |
1944 | + md_list_add(&dname->list, &device_names); | |
1945 | + | |
1946 | + return dname->name; | |
1947 | +} | |
1948 | + | |
1949 | +static unsigned int calc_dev_sboffset (kdev_t dev, mddev_t *mddev, | |
1950 | + int persistent) | |
1951 | +{ | |
1952 | + unsigned int size = 0; | |
1953 | + | |
1954 | + if (blk_size[MAJOR(dev)]) | |
1955 | + size = blk_size[MAJOR(dev)][MINOR(dev)]; | |
1956 | + if (persistent) | |
1957 | + size = MD_NEW_SIZE_BLOCKS(size); | |
1958 | + return size; | |
1959 | +} | |
1960 | + | |
1961 | +static unsigned int calc_dev_size (kdev_t dev, mddev_t *mddev, int persistent) | |
1962 | +{ | |
1963 | + unsigned int size; | |
1964 | + | |
1965 | + size = calc_dev_sboffset(dev, mddev, persistent); | |
1966 | + if (!mddev->sb) { | |
1967 | + MD_BUG(); | |
1968 | + return size; | |
1969 | + } | |
1970 | + if (mddev->sb->chunk_size) | |
1971 | + size &= ~(mddev->sb->chunk_size/1024 - 1); | |
1972 | + return size; | |
1973 | +} | |
1974 | + | |
1975 | +/* | |
1976 | + * We check wether all devices are numbered from 0 to nb_dev-1. The | |
1977 | + * order is guaranteed even after device name changes. | |
1978 | + * | |
1979 | + * Some personalities (raid0, linear) use this. Personalities that | |
1980 | + * provide data have to be able to deal with loss of individual | |
1981 | + * disks, so they do their checking themselves. | |
1982 | + */ | |
1983 | +int md_check_ordering (mddev_t *mddev) | |
1984 | +{ | |
1985 | + int i, c; | |
1986 | + mdk_rdev_t *rdev; | |
1987 | + struct md_list_head *tmp; | |
1988 | ||
1989 | /* | |
1990 | - * The superblock constant part has to be the same | |
1991 | - * for all disks in the array. | |
1992 | + * First, all devices must be fully functional | |
1993 | */ | |
1994 | - sb = NULL; | |
1995 | - for (i = 0; i < N; i++) { | |
1996 | - if (!disks[i].sb) | |
1997 | - continue; | |
1998 | - if (!sb) { | |
1999 | - sb = disks[i].sb; | |
2000 | - continue; | |
2001 | - } | |
2002 | - if (memcmp(sb, | |
2003 | - disks[i].sb, MD_SB_GENERIC_CONSTANT_WORDS * 4)) { | |
2004 | - printk (INCONSISTENT); | |
2005 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2006 | + if (rdev->faulty) { | |
2007 | + printk("md: md%d's device %s faulty, aborting.\n", | |
2008 | + mdidx(mddev), partition_name(rdev->dev)); | |
2009 | goto abort; | |
2010 | } | |
2011 | } | |
2012 | ||
2013 | - /* | |
2014 | - * OK, we have all disks and the array is ready to run. Let's | |
2015 | - * find the freshest superblock, that one will be the superblock | |
2016 | - * that represents the whole array. | |
2017 | - */ | |
2018 | - if ((sb = mddev->sb = (md_superblock_t *) __get_free_page (GFP_KERNEL)) == NULL) | |
2019 | + c = 0; | |
2020 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2021 | + c++; | |
2022 | + } | |
2023 | + if (c != mddev->nb_dev) { | |
2024 | + MD_BUG(); | |
2025 | goto abort; | |
2026 | - freshest = NULL; | |
2027 | - for (i = 0; i < N; i++) { | |
2028 | - if (!disks[i].sb) | |
2029 | - continue; | |
2030 | - if (!freshest) { | |
2031 | - freshest = disks[i].sb; | |
2032 | - continue; | |
2033 | - } | |
2034 | - /* | |
2035 | - * Find the newest superblock version | |
2036 | - */ | |
2037 | - if (disks[i].sb->utime != freshest->utime) { | |
2038 | - out_of_date = 1; | |
2039 | - if (disks[i].sb->utime > freshest->utime) | |
2040 | - freshest = disks[i].sb; | |
2041 | - } | |
2042 | } | |
2043 | - if (out_of_date) | |
2044 | - printk(OUT_OF_DATE); | |
2045 | - memcpy (sb, freshest, sizeof(*freshest)); | |
2046 | - | |
2047 | - /* | |
2048 | - * Check if we can support this RAID array | |
2049 | - */ | |
2050 | - if (sb->major_version != MD_MAJOR_VERSION || | |
2051 | - sb->minor_version > MD_MINOR_VERSION) { | |
2052 | - | |
2053 | - printk (OLD_VERSION, kdevname(MKDEV(MD_MAJOR, minor)), | |
2054 | - sb->major_version, sb->minor_version, | |
2055 | - sb->patch_version); | |
2056 | + if (mddev->nb_dev != mddev->sb->raid_disks) { | |
2057 | + printk("md: md%d, array needs %d disks, has %d, aborting.\n", | |
2058 | + mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev); | |
2059 | goto abort; | |
2060 | } | |
2061 | - | |
2062 | /* | |
2063 | - * We need to add this as a superblock option. | |
2064 | + * Now the numbering check | |
2065 | */ | |
2066 | -#if SUPPORT_RECONSTRUCTION | |
2067 | - if (sb->state != (1 << MD_SB_CLEAN)) { | |
2068 | - if (sb->level == 1) { | |
2069 | - printk (NOT_CLEAN, kdevname(MKDEV(MD_MAJOR, minor))); | |
2070 | + for (i = 0; i < mddev->nb_dev; i++) { | |
2071 | + c = 0; | |
2072 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2073 | + if (rdev->desc_nr == i) | |
2074 | + c++; | |
2075 | + } | |
2076 | + if (c == 0) { | |
2077 | + printk("md: md%d, missing disk #%d, aborting.\n", | |
2078 | + mdidx(mddev), i); | |
2079 | goto abort; | |
2080 | - } else | |
2081 | - printk (NOT_CLEAN_IGNORE, kdevname(MKDEV(MD_MAJOR, minor))); | |
2082 | - } | |
2083 | -#else | |
2084 | - if (sb->state != (1 << MD_SB_CLEAN)) { | |
2085 | - printk (NOT_CLEAN, kdevname(MKDEV(MD_MAJOR, minor))); | |
2086 | - goto abort; | |
2087 | - } | |
2088 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
2089 | - | |
2090 | - switch (sb->level) { | |
2091 | - case 1: | |
2092 | - md_size[minor] = sb->size; | |
2093 | - md_maxreadahead[minor] = MD_DEFAULT_DISK_READAHEAD; | |
2094 | - break; | |
2095 | - case 4: | |
2096 | - case 5: | |
2097 | - md_size[minor] = sb->size * (sb->raid_disks - 1); | |
2098 | - md_maxreadahead[minor] = MD_DEFAULT_DISK_READAHEAD * (sb->raid_disks - 1); | |
2099 | - break; | |
2100 | - default: | |
2101 | - printk (UNKNOWN_LEVEL, kdevname(MKDEV(MD_MAJOR, minor)), | |
2102 | - sb->level); | |
2103 | + } | |
2104 | + if (c > 1) { | |
2105 | + printk("md: md%d, too many disks #%d, aborting.\n", | |
2106 | + mdidx(mddev), i); | |
2107 | goto abort; | |
2108 | + } | |
2109 | } | |
2110 | return 0; | |
2111 | abort: | |
2112 | - free_sb(mddev); | |
2113 | return 1; | |
2114 | } | |
2115 | ||
2116 | -#undef INCONSISTENT | |
2117 | -#undef OUT_OF_DATE | |
2118 | -#undef OLD_VERSION | |
2119 | -#undef NOT_CLEAN | |
2120 | -#undef OLD_LEVEL | |
2121 | - | |
2122 | -int md_update_sb(int minor) | |
2123 | +static unsigned int zoned_raid_size (mddev_t *mddev) | |
2124 | { | |
2125 | - struct md_dev *mddev = md_dev + minor; | |
2126 | - struct buffer_head *bh; | |
2127 | - md_superblock_t *sb = mddev->sb; | |
2128 | - struct real_dev *realdev; | |
2129 | - kdev_t dev; | |
2130 | - int i; | |
2131 | - u32 sb_offset; | |
2132 | + unsigned int mask; | |
2133 | + mdk_rdev_t * rdev; | |
2134 | + struct md_list_head *tmp; | |
2135 | ||
2136 | - sb->utime = CURRENT_TIME; | |
2137 | - for (i = 0; i < mddev->nb_dev; i++) { | |
2138 | - realdev = mddev->devices + i; | |
2139 | - if (!realdev->sb) | |
2140 | - continue; | |
2141 | - dev = realdev->dev; | |
2142 | - sb_offset = realdev->sb_offset; | |
2143 | - set_blocksize(dev, MD_SB_BYTES); | |
2144 | - printk("md: updating raid superblock on device %s, sb_offset == %u\n", kdevname(dev), sb_offset); | |
2145 | - bh = getblk(dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
2146 | - if (bh) { | |
2147 | - sb = (md_superblock_t *) bh->b_data; | |
2148 | - memcpy(sb, mddev->sb, MD_SB_BYTES); | |
2149 | - memcpy(&sb->descriptor, sb->disks + realdev->sb->descriptor.number, MD_SB_DESCRIPTOR_WORDS * 4); | |
2150 | - mark_buffer_uptodate(bh, 1); | |
2151 | - mark_buffer_dirty(bh, 1); | |
2152 | - ll_rw_block(WRITE, 1, &bh); | |
2153 | - wait_on_buffer(bh); | |
2154 | - bforget(bh); | |
2155 | - fsync_dev(dev); | |
2156 | - invalidate_buffers(dev); | |
2157 | - } else | |
2158 | - printk(KERN_ERR "md: getblk failed for device %s\n", kdevname(dev)); | |
2159 | + if (!mddev->sb) { | |
2160 | + MD_BUG(); | |
2161 | + return -EINVAL; | |
2162 | + } | |
2163 | + /* | |
2164 | + * do size and offset calculations. | |
2165 | + */ | |
2166 | + mask = ~(mddev->sb->chunk_size/1024 - 1); | |
2167 | +printk("mask %08x\n", mask); | |
2168 | + | |
2169 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2170 | +printk(" rdev->size: %d\n", rdev->size); | |
2171 | + rdev->size &= mask; | |
2172 | +printk(" masked rdev->size: %d\n", rdev->size); | |
2173 | + md_size[mdidx(mddev)] += rdev->size; | |
2174 | +printk(" new md_size: %d\n", md_size[mdidx(mddev)]); | |
2175 | } | |
2176 | return 0; | |
2177 | } | |
2178 | ||
2179 | -static int do_md_run (int minor, int repart) | |
2180 | +static void remove_descriptor (mdp_disk_t *disk, mdp_super_t *sb) | |
2181 | { | |
2182 | - int pnum, i, min, factor, err; | |
2183 | + if (disk_active(disk)) { | |
2184 | + sb->working_disks--; | |
2185 | + } else { | |
2186 | + if (disk_spare(disk)) { | |
2187 | + sb->spare_disks--; | |
2188 | + sb->working_disks--; | |
2189 | + } else { | |
2190 | + sb->failed_disks--; | |
2191 | + } | |
2192 | + } | |
2193 | + sb->nr_disks--; | |
2194 | + disk->major = 0; | |
2195 | + disk->minor = 0; | |
2196 | + mark_disk_removed(disk); | |
2197 | +} | |
2198 | ||
2199 | - if (!md_dev[minor].nb_dev) | |
2200 | - return -EINVAL; | |
2201 | - | |
2202 | - if (md_dev[minor].pers) | |
2203 | - return -EBUSY; | |
2204 | +#define BAD_MAGIC KERN_ERR \ | |
2205 | +"md: invalid raid superblock magic on %s\n" | |
2206 | ||
2207 | - md_dev[minor].repartition=repart; | |
2208 | - | |
2209 | - if ((pnum=PERSONALITY(&md_dev[minor]) >> (PERSONALITY_SHIFT)) | |
2210 | - >= MAX_PERSONALITY) | |
2211 | - return -EINVAL; | |
2212 | - | |
2213 | - /* Only RAID-1 and RAID-5 can have MD devices as underlying devices */ | |
2214 | - if (pnum != (RAID1 >> PERSONALITY_SHIFT) && pnum != (RAID5 >> PERSONALITY_SHIFT)){ | |
2215 | - for (i = 0; i < md_dev [minor].nb_dev; i++) | |
2216 | - if (MAJOR (md_dev [minor].devices [i].dev) == MD_MAJOR) | |
2217 | - return -EINVAL; | |
2218 | - } | |
2219 | - if (!pers[pnum]) | |
2220 | - { | |
2221 | -#ifdef CONFIG_KMOD | |
2222 | - char module_name[80]; | |
2223 | - sprintf (module_name, "md-personality-%d", pnum); | |
2224 | - request_module (module_name); | |
2225 | - if (!pers[pnum]) | |
2226 | -#endif | |
2227 | - return -EINVAL; | |
2228 | - } | |
2229 | - | |
2230 | - factor = min = 1 << FACTOR_SHIFT(FACTOR((md_dev+minor))); | |
2231 | - | |
2232 | - md_blocksizes[minor] <<= FACTOR_SHIFT(FACTOR((md_dev+minor))); | |
2233 | +#define BAD_MINOR KERN_ERR \ | |
2234 | +"md: %s: invalid raid minor (%x)\n" | |
2235 | ||
2236 | - for (i=0; i<md_dev[minor].nb_dev; i++) | |
2237 | - if (md_dev[minor].devices[i].size<min) | |
2238 | - { | |
2239 | - printk ("Dev %s smaller than %dk, cannot shrink\n", | |
2240 | - partition_name (md_dev[minor].devices[i].dev), min); | |
2241 | - return -EINVAL; | |
2242 | - } | |
2243 | - | |
2244 | - for (i=0; i<md_dev[minor].nb_dev; i++) { | |
2245 | - fsync_dev(md_dev[minor].devices[i].dev); | |
2246 | - invalidate_buffers(md_dev[minor].devices[i].dev); | |
2247 | - } | |
2248 | - | |
2249 | - /* Resize devices according to the factor. It is used to align | |
2250 | - partitions size on a given chunk size. */ | |
2251 | - md_size[minor]=0; | |
2252 | - | |
2253 | - /* | |
2254 | - * Analyze the raid superblock | |
2255 | - */ | |
2256 | - if (analyze_sbs(minor, pnum)) | |
2257 | - return -EINVAL; | |
2258 | +#define OUT_OF_MEM KERN_ALERT \ | |
2259 | +"md: out of memory.\n" | |
2260 | ||
2261 | - md_dev[minor].pers=pers[pnum]; | |
2262 | - | |
2263 | - if ((err=md_dev[minor].pers->run (minor, md_dev+minor))) | |
2264 | - { | |
2265 | - md_dev[minor].pers=NULL; | |
2266 | - free_sb(md_dev + minor); | |
2267 | - return (err); | |
2268 | - } | |
2269 | - | |
2270 | - if (pnum != RAID0 >> PERSONALITY_SHIFT && pnum != LINEAR >> PERSONALITY_SHIFT) | |
2271 | - { | |
2272 | - md_dev[minor].sb->state &= ~(1 << MD_SB_CLEAN); | |
2273 | - md_update_sb(minor); | |
2274 | - } | |
2275 | - | |
2276 | - /* FIXME : We assume here we have blocks | |
2277 | - that are twice as large as sectors. | |
2278 | - THIS MAY NOT BE TRUE !!! */ | |
2279 | - md_hd_struct[minor].start_sect=0; | |
2280 | - md_hd_struct[minor].nr_sects=md_size[minor]<<1; | |
2281 | - | |
2282 | - read_ahead[MD_MAJOR] = 128; | |
2283 | - return (0); | |
2284 | -} | |
2285 | +#define NO_SB KERN_ERR \ | |
2286 | +"md: disabled device %s, could not read superblock.\n" | |
2287 | + | |
2288 | +#define BAD_CSUM KERN_WARNING \ | |
2289 | +"md: invalid superblock checksum on %s\n" | |
2290 | ||
2291 | -static int do_md_stop (int minor, struct inode *inode) | |
2292 | +static int alloc_array_sb (mddev_t * mddev) | |
2293 | { | |
2294 | - int i; | |
2295 | - | |
2296 | - if (inode->i_count>1 || md_dev[minor].busy>1) { | |
2297 | - /* | |
2298 | - * ioctl : one open channel | |
2299 | - */ | |
2300 | - printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", | |
2301 | - minor, inode->i_count, md_dev[minor].busy); | |
2302 | - return -EBUSY; | |
2303 | - } | |
2304 | - | |
2305 | - if (md_dev[minor].pers) { | |
2306 | - /* | |
2307 | - * It is safe to call stop here, it only frees private | |
2308 | - * data. Also, it tells us if a device is unstoppable | |
2309 | - * (eg. resyncing is in progress) | |
2310 | - */ | |
2311 | - if (md_dev[minor].pers->stop (minor, md_dev+minor)) | |
2312 | - return -EBUSY; | |
2313 | - /* | |
2314 | - * The device won't exist anymore -> flush it now | |
2315 | - */ | |
2316 | - fsync_dev (inode->i_rdev); | |
2317 | - invalidate_buffers (inode->i_rdev); | |
2318 | - if (md_dev[minor].sb) { | |
2319 | - md_dev[minor].sb->state |= 1 << MD_SB_CLEAN; | |
2320 | - md_update_sb(minor); | |
2321 | - } | |
2322 | + if (mddev->sb) { | |
2323 | + MD_BUG(); | |
2324 | + return 0; | |
2325 | } | |
2326 | - | |
2327 | - /* Remove locks. */ | |
2328 | - if (md_dev[minor].sb) | |
2329 | - free_sb(md_dev + minor); | |
2330 | - for (i=0; i<md_dev[minor].nb_dev; i++) | |
2331 | - clear_inode (md_dev[minor].devices[i].inode); | |
2332 | - | |
2333 | - md_dev[minor].nb_dev=md_size[minor]=0; | |
2334 | - md_hd_struct[minor].nr_sects=0; | |
2335 | - md_dev[minor].pers=NULL; | |
2336 | - | |
2337 | - read_ahead[MD_MAJOR] = 128; | |
2338 | - | |
2339 | - return (0); | |
2340 | + | |
2341 | + mddev->sb = (mdp_super_t *) __get_free_page (GFP_KERNEL); | |
2342 | + if (!mddev->sb) | |
2343 | + return -ENOMEM; | |
2344 | + md_clear_page((unsigned long)mddev->sb); | |
2345 | + return 0; | |
2346 | } | |
2347 | ||
2348 | -static int do_md_add (int minor, kdev_t dev) | |
2349 | +static int alloc_disk_sb (mdk_rdev_t * rdev) | |
2350 | { | |
2351 | - int i; | |
2352 | - int hot_add=0; | |
2353 | - struct real_dev *realdev; | |
2354 | + if (rdev->sb) | |
2355 | + MD_BUG(); | |
2356 | ||
2357 | - if (md_dev[minor].nb_dev==MAX_REAL) | |
2358 | + rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL); | |
2359 | + if (!rdev->sb) { | |
2360 | + printk (OUT_OF_MEM); | |
2361 | return -EINVAL; | |
2362 | + } | |
2363 | + md_clear_page((unsigned long)rdev->sb); | |
2364 | ||
2365 | - if (!fs_may_mount (dev)) | |
2366 | - return -EBUSY; | |
2367 | + return 0; | |
2368 | +} | |
2369 | ||
2370 | - if (blk_size[MAJOR(dev)] == NULL || blk_size[MAJOR(dev)][MINOR(dev)] == 0) { | |
2371 | - printk("md_add(): zero device size, huh, bailing out.\n"); | |
2372 | - return -EINVAL; | |
2373 | +static void free_disk_sb (mdk_rdev_t * rdev) | |
2374 | +{ | |
2375 | + if (rdev->sb) { | |
2376 | + free_page((unsigned long) rdev->sb); | |
2377 | + rdev->sb = NULL; | |
2378 | + rdev->sb_offset = 0; | |
2379 | + rdev->size = 0; | |
2380 | + } else { | |
2381 | + if (!rdev->faulty) | |
2382 | + MD_BUG(); | |
2383 | } | |
2384 | +} | |
2385 | ||
2386 | - if (md_dev[minor].pers) { | |
2387 | - /* | |
2388 | - * The array is already running, hot-add the drive, or | |
2389 | - * bail out: | |
2390 | - */ | |
2391 | - if (!md_dev[minor].pers->hot_add_disk) | |
2392 | - return -EBUSY; | |
2393 | - else | |
2394 | - hot_add=1; | |
2395 | +static void mark_rdev_faulty (mdk_rdev_t * rdev) | |
2396 | +{ | |
2397 | + unsigned long flags; | |
2398 | + | |
2399 | + if (!rdev) { | |
2400 | + MD_BUG(); | |
2401 | + return; | |
2402 | } | |
2403 | + save_flags(flags); | |
2404 | + cli(); | |
2405 | + free_disk_sb(rdev); | |
2406 | + rdev->faulty = 1; | |
2407 | + restore_flags(flags); | |
2408 | +} | |
2409 | + | |
2410 | +static int read_disk_sb (mdk_rdev_t * rdev) | |
2411 | +{ | |
2412 | + int ret = -EINVAL; | |
2413 | + struct buffer_head *bh = NULL; | |
2414 | + kdev_t dev = rdev->dev; | |
2415 | + mdp_super_t *sb; | |
2416 | + u32 sb_offset; | |
2417 | ||
2418 | + if (!rdev->sb) { | |
2419 | + MD_BUG(); | |
2420 | + goto abort; | |
2421 | + } | |
2422 | + | |
2423 | /* | |
2424 | - * Careful. We cannot increase nb_dev for a running array. | |
2425 | + * Calculate the position of the superblock, | |
2426 | + * it's at the end of the disk | |
2427 | */ | |
2428 | - i=md_dev[minor].nb_dev; | |
2429 | - realdev = &md_dev[minor].devices[i]; | |
2430 | - realdev->dev=dev; | |
2431 | - | |
2432 | - /* Lock the device by inserting a dummy inode. This doesn't | |
2433 | - smell very good, but I need to be consistent with the | |
2434 | - mount stuff, specially with fs_may_mount. If someone have | |
2435 | - a better idea, please help ! */ | |
2436 | - | |
2437 | - realdev->inode=get_empty_inode (); | |
2438 | - realdev->inode->i_dev=dev; /* don't care about other fields */ | |
2439 | - insert_inode_hash (realdev->inode); | |
2440 | - | |
2441 | - /* Sizes are now rounded at run time */ | |
2442 | - | |
2443 | -/* md_dev[minor].devices[i].size=gen_real->sizes[MINOR(dev)]; HACKHACK*/ | |
2444 | - | |
2445 | - realdev->size=blk_size[MAJOR(dev)][MINOR(dev)]; | |
2446 | + sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1); | |
2447 | + rdev->sb_offset = sb_offset; | |
2448 | + printk("(read) %s's sb offset: %d", partition_name(dev), | |
2449 | + sb_offset); | |
2450 | + fsync_dev(dev); | |
2451 | + set_blocksize (dev, MD_SB_BYTES); | |
2452 | + bh = bread (dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
2453 | ||
2454 | - if (hot_add) { | |
2455 | - /* | |
2456 | - * Check the superblock for consistency. | |
2457 | - * The personality itself has to check whether it's getting | |
2458 | - * added with the proper flags. The personality has to be | |
2459 | - * checked too. ;) | |
2460 | + if (bh) { | |
2461 | + sb = (mdp_super_t *) bh->b_data; | |
2462 | + memcpy (rdev->sb, sb, MD_SB_BYTES); | |
2463 | + } else { | |
2464 | + printk (NO_SB,partition_name(rdev->dev)); | |
2465 | + goto abort; | |
2466 | + } | |
2467 | + printk(" [events: %08lx]\n", (unsigned long)get_unaligned(&rdev->sb->events)); | |
2468 | + ret = 0; | |
2469 | +abort: | |
2470 | + if (bh) | |
2471 | + brelse (bh); | |
2472 | + return ret; | |
2473 | +} | |
2474 | + | |
2475 | +static unsigned int calc_sb_csum (mdp_super_t * sb) | |
2476 | +{ | |
2477 | + unsigned int disk_csum, csum; | |
2478 | + | |
2479 | + disk_csum = sb->sb_csum; | |
2480 | + sb->sb_csum = 0; | |
2481 | + csum = csum_partial((void *)sb, MD_SB_BYTES, 0); | |
2482 | + sb->sb_csum = disk_csum; | |
2483 | + return csum; | |
2484 | +} | |
2485 | + | |
2486 | +/* | |
2487 | + * Check one RAID superblock for generic plausibility | |
2488 | + */ | |
2489 | + | |
2490 | +static int check_disk_sb (mdk_rdev_t * rdev) | |
2491 | +{ | |
2492 | + mdp_super_t *sb; | |
2493 | + int ret = -EINVAL; | |
2494 | + | |
2495 | + sb = rdev->sb; | |
2496 | + if (!sb) { | |
2497 | + MD_BUG(); | |
2498 | + goto abort; | |
2499 | + } | |
2500 | + | |
2501 | + if (sb->md_magic != MD_SB_MAGIC) { | |
2502 | + printk (BAD_MAGIC, partition_name(rdev->dev)); | |
2503 | + goto abort; | |
2504 | + } | |
2505 | + | |
2506 | + if (sb->md_minor >= MAX_MD_DEVS) { | |
2507 | + printk (BAD_MINOR, partition_name(rdev->dev), | |
2508 | + sb->md_minor); | |
2509 | + goto abort; | |
2510 | + } | |
2511 | + | |
2512 | + if (calc_sb_csum(sb) != sb->sb_csum) | |
2513 | + printk(BAD_CSUM, partition_name(rdev->dev)); | |
2514 | + ret = 0; | |
2515 | +abort: | |
2516 | + return ret; | |
2517 | +} | |
2518 | + | |
2519 | +static kdev_t dev_unit(kdev_t dev) | |
2520 | +{ | |
2521 | + unsigned int mask; | |
2522 | + struct gendisk *hd = find_gendisk(dev); | |
2523 | + | |
2524 | + if (!hd) | |
2525 | + return 0; | |
2526 | + mask = ~((1 << hd->minor_shift) - 1); | |
2527 | + | |
2528 | + return MKDEV(MAJOR(dev), MINOR(dev) & mask); | |
2529 | +} | |
2530 | + | |
2531 | +static mdk_rdev_t * match_dev_unit(mddev_t *mddev, kdev_t dev) | |
2532 | +{ | |
2533 | + struct md_list_head *tmp; | |
2534 | + mdk_rdev_t *rdev; | |
2535 | + | |
2536 | + ITERATE_RDEV(mddev,rdev,tmp) | |
2537 | + if (dev_unit(rdev->dev) == dev_unit(dev)) | |
2538 | + return rdev; | |
2539 | + | |
2540 | + return NULL; | |
2541 | +} | |
2542 | + | |
2543 | +static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |
2544 | +{ | |
2545 | + struct md_list_head *tmp; | |
2546 | + mdk_rdev_t *rdev; | |
2547 | + | |
2548 | + ITERATE_RDEV(mddev1,rdev,tmp) | |
2549 | + if (match_dev_unit(mddev2, rdev->dev)) | |
2550 | + return 1; | |
2551 | + | |
2552 | + return 0; | |
2553 | +} | |
2554 | + | |
2555 | +static MD_LIST_HEAD(all_raid_disks); | |
2556 | +static MD_LIST_HEAD(pending_raid_disks); | |
2557 | + | |
2558 | +static void bind_rdev_to_array (mdk_rdev_t * rdev, mddev_t * mddev) | |
2559 | +{ | |
2560 | + mdk_rdev_t *same_pdev; | |
2561 | + | |
2562 | + if (rdev->mddev) { | |
2563 | + MD_BUG(); | |
2564 | + return; | |
2565 | + } | |
2566 | + same_pdev = match_dev_unit(mddev, rdev->dev); | |
2567 | + if (same_pdev) | |
2568 | + printk( KERN_WARNING | |
2569 | +"md%d: WARNING: %s appears to be on the same physical disk as %s. True\n" | |
2570 | +" protection against single-disk failure might be compromised.\n", | |
2571 | + mdidx(mddev), partition_name(rdev->dev), | |
2572 | + partition_name(same_pdev->dev)); | |
2573 | + | |
2574 | + md_list_add(&rdev->same_set, &mddev->disks); | |
2575 | + rdev->mddev = mddev; | |
2576 | + mddev->nb_dev++; | |
2577 | + printk("bind<%s,%d>\n", partition_name(rdev->dev), mddev->nb_dev); | |
2578 | +} | |
2579 | + | |
2580 | +static void unbind_rdev_from_array (mdk_rdev_t * rdev) | |
2581 | +{ | |
2582 | + if (!rdev->mddev) { | |
2583 | + MD_BUG(); | |
2584 | + return; | |
2585 | + } | |
2586 | + md_list_del(&rdev->same_set); | |
2587 | + MD_INIT_LIST_HEAD(&rdev->same_set); | |
2588 | + rdev->mddev->nb_dev--; | |
2589 | + printk("unbind<%s,%d>\n", partition_name(rdev->dev), | |
2590 | + rdev->mddev->nb_dev); | |
2591 | + rdev->mddev = NULL; | |
2592 | +} | |
2593 | + | |
2594 | +/* | |
2595 | + * prevent the device from being mounted, repartitioned or | |
2596 | + * otherwise reused by a RAID array (or any other kernel | |
2597 | + * subsystem), by opening the device. [simply getting an | |
2598 | + * inode is not enough, the SCSI module usage code needs | |
2599 | + * an explicit open() on the device] | |
2600 | + */ | |
2601 | +static int lock_rdev (mdk_rdev_t *rdev) | |
2602 | +{ | |
2603 | + int err = 0; | |
2604 | + | |
2605 | + /* | |
2606 | + * First insert a dummy inode. | |
2607 | + */ | |
2608 | + if (rdev->inode) | |
2609 | + MD_BUG(); | |
2610 | + rdev->inode = get_empty_inode(); | |
2611 | + /* | |
2612 | + * we dont care about any other fields | |
2613 | + */ | |
2614 | + rdev->inode->i_dev = rdev->inode->i_rdev = rdev->dev; | |
2615 | + insert_inode_hash(rdev->inode); | |
2616 | + | |
2617 | + memset(&rdev->filp, 0, sizeof(rdev->filp)); | |
2618 | + rdev->filp.f_mode = 3; /* read write */ | |
2619 | + err = blkdev_open(rdev->inode, &rdev->filp); | |
2620 | + if (err) { | |
2621 | + printk("blkdev_open() failed: %d\n", err); | |
2622 | + clear_inode(rdev->inode); | |
2623 | + rdev->inode = NULL; | |
2624 | + } | |
2625 | + return err; | |
2626 | +} | |
2627 | + | |
2628 | +static void unlock_rdev (mdk_rdev_t *rdev) | |
2629 | +{ | |
2630 | + blkdev_release(rdev->inode); | |
2631 | + if (!rdev->inode) | |
2632 | + MD_BUG(); | |
2633 | + clear_inode(rdev->inode); | |
2634 | + rdev->inode = NULL; | |
2635 | +} | |
2636 | + | |
2637 | +static void export_rdev (mdk_rdev_t * rdev) | |
2638 | +{ | |
2639 | + printk("export_rdev(%s)\n",partition_name(rdev->dev)); | |
2640 | + if (rdev->mddev) | |
2641 | + MD_BUG(); | |
2642 | + unlock_rdev(rdev); | |
2643 | + free_disk_sb(rdev); | |
2644 | + md_list_del(&rdev->all); | |
2645 | + MD_INIT_LIST_HEAD(&rdev->all); | |
2646 | + if (rdev->pending.next != &rdev->pending) { | |
2647 | + printk("(%s was pending)\n",partition_name(rdev->dev)); | |
2648 | + md_list_del(&rdev->pending); | |
2649 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
2650 | + } | |
2651 | + rdev->dev = 0; | |
2652 | + rdev->faulty = 0; | |
2653 | + kfree(rdev); | |
2654 | +} | |
2655 | + | |
2656 | +static void kick_rdev_from_array (mdk_rdev_t * rdev) | |
2657 | +{ | |
2658 | + unbind_rdev_from_array(rdev); | |
2659 | + export_rdev(rdev); | |
2660 | +} | |
2661 | + | |
2662 | +static void export_array (mddev_t *mddev) | |
2663 | +{ | |
2664 | + struct md_list_head *tmp; | |
2665 | + mdk_rdev_t *rdev; | |
2666 | + mdp_super_t *sb = mddev->sb; | |
2667 | + | |
2668 | + if (mddev->sb) { | |
2669 | + mddev->sb = NULL; | |
2670 | + free_page((unsigned long) sb); | |
2671 | + } | |
2672 | + | |
2673 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2674 | + if (!rdev->mddev) { | |
2675 | + MD_BUG(); | |
2676 | + continue; | |
2677 | + } | |
2678 | + kick_rdev_from_array(rdev); | |
2679 | + } | |
2680 | + if (mddev->nb_dev) | |
2681 | + MD_BUG(); | |
2682 | +} | |
2683 | + | |
2684 | +#undef BAD_CSUM | |
2685 | +#undef BAD_MAGIC | |
2686 | +#undef OUT_OF_MEM | |
2687 | +#undef NO_SB | |
2688 | + | |
2689 | +static void print_desc(mdp_disk_t *desc) | |
2690 | +{ | |
2691 | + printk(" DISK<N:%d,%s(%d,%d),R:%d,S:%d>\n", desc->number, | |
2692 | + partition_name(MKDEV(desc->major,desc->minor)), | |
2693 | + desc->major,desc->minor,desc->raid_disk,desc->state); | |
2694 | +} | |
2695 | + | |
2696 | +static void print_sb(mdp_super_t *sb) | |
2697 | +{ | |
2698 | + int i; | |
2699 | + | |
2700 | + printk(" SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n", | |
2701 | + sb->major_version, sb->minor_version, sb->patch_version, | |
2702 | + sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3, | |
2703 | + sb->ctime); | |
2704 | + printk(" L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", sb->level, | |
2705 | + sb->size, sb->nr_disks, sb->raid_disks, sb->md_minor, | |
2706 | + sb->layout, sb->chunk_size); | |
2707 | + printk(" UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n", | |
2708 | + sb->utime, sb->state, sb->active_disks, sb->working_disks, | |
2709 | + sb->failed_disks, sb->spare_disks, | |
2710 | + sb->sb_csum, (unsigned long)get_unaligned(&sb->events)); | |
2711 | + | |
2712 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
2713 | + mdp_disk_t *desc; | |
2714 | + | |
2715 | + desc = sb->disks + i; | |
2716 | + printk(" D %2d: ", i); | |
2717 | + print_desc(desc); | |
2718 | + } | |
2719 | + printk(" THIS: "); | |
2720 | + print_desc(&sb->this_disk); | |
2721 | + | |
2722 | +} | |
2723 | + | |
2724 | +static void print_rdev(mdk_rdev_t *rdev) | |
2725 | +{ | |
2726 | + printk(" rdev %s: O:%s, SZ:%08d F:%d DN:%d ", | |
2727 | + partition_name(rdev->dev), partition_name(rdev->old_dev), | |
2728 | + rdev->size, rdev->faulty, rdev->desc_nr); | |
2729 | + if (rdev->sb) { | |
2730 | + printk("rdev superblock:\n"); | |
2731 | + print_sb(rdev->sb); | |
2732 | + } else | |
2733 | + printk("no rdev superblock!\n"); | |
2734 | +} | |
2735 | + | |
2736 | +void md_print_devices (void) | |
2737 | +{ | |
2738 | + struct md_list_head *tmp, *tmp2; | |
2739 | + mdk_rdev_t *rdev; | |
2740 | + mddev_t *mddev; | |
2741 | + | |
2742 | + printk("\n"); | |
2743 | + printk(" **********************************\n"); | |
2744 | + printk(" * <COMPLETE RAID STATE PRINTOUT> *\n"); | |
2745 | + printk(" **********************************\n"); | |
2746 | + ITERATE_MDDEV(mddev,tmp) { | |
2747 | + printk("md%d: ", mdidx(mddev)); | |
2748 | + | |
2749 | + ITERATE_RDEV(mddev,rdev,tmp2) | |
2750 | + printk("<%s>", partition_name(rdev->dev)); | |
2751 | + | |
2752 | + if (mddev->sb) { | |
2753 | + printk(" array superblock:\n"); | |
2754 | + print_sb(mddev->sb); | |
2755 | + } else | |
2756 | + printk(" no array superblock.\n"); | |
2757 | + | |
2758 | + ITERATE_RDEV(mddev,rdev,tmp2) | |
2759 | + print_rdev(rdev); | |
2760 | + } | |
2761 | + printk(" **********************************\n"); | |
2762 | + printk("\n"); | |
2763 | +} | |
2764 | + | |
2765 | +static int sb_equal ( mdp_super_t *sb1, mdp_super_t *sb2) | |
2766 | +{ | |
2767 | + int ret; | |
2768 | + mdp_super_t *tmp1, *tmp2; | |
2769 | + | |
2770 | + tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL); | |
2771 | + tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL); | |
2772 | + | |
2773 | + if (!tmp1 || !tmp2) { | |
2774 | + ret = 0; | |
2775 | + goto abort; | |
2776 | + } | |
2777 | + | |
2778 | + *tmp1 = *sb1; | |
2779 | + *tmp2 = *sb2; | |
2780 | + | |
2781 | + /* | |
2782 | + * nr_disks is not constant | |
2783 | + */ | |
2784 | + tmp1->nr_disks = 0; | |
2785 | + tmp2->nr_disks = 0; | |
2786 | + | |
2787 | + if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) | |
2788 | + ret = 0; | |
2789 | + else | |
2790 | + ret = 1; | |
2791 | + | |
2792 | +abort: | |
2793 | + if (tmp1) | |
2794 | + kfree(tmp1); | |
2795 | + if (tmp2) | |
2796 | + kfree(tmp2); | |
2797 | + | |
2798 | + return ret; | |
2799 | +} | |
2800 | + | |
2801 | +static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2) | |
2802 | +{ | |
2803 | + if ( (rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) && | |
2804 | + (rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) && | |
2805 | + (rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) && | |
2806 | + (rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3)) | |
2807 | + | |
2808 | + return 1; | |
2809 | + | |
2810 | + return 0; | |
2811 | +} | |
2812 | + | |
2813 | +static mdk_rdev_t * find_rdev_all (kdev_t dev) | |
2814 | +{ | |
2815 | + struct md_list_head *tmp; | |
2816 | + mdk_rdev_t *rdev; | |
2817 | + | |
2818 | + tmp = all_raid_disks.next; | |
2819 | + while (tmp != &all_raid_disks) { | |
2820 | + rdev = md_list_entry(tmp, mdk_rdev_t, all); | |
2821 | + if (rdev->dev == dev) | |
2822 | + return rdev; | |
2823 | + tmp = tmp->next; | |
2824 | + } | |
2825 | + return NULL; | |
2826 | +} | |
2827 | + | |
2828 | +#define GETBLK_FAILED KERN_ERR \ | |
2829 | +"md: getblk failed for device %s\n" | |
2830 | + | |
2831 | +static int write_disk_sb(mdk_rdev_t * rdev) | |
2832 | +{ | |
2833 | + struct buffer_head *bh; | |
2834 | + kdev_t dev; | |
2835 | + u32 sb_offset, size; | |
2836 | + mdp_super_t *sb; | |
2837 | + | |
2838 | + if (!rdev->sb) { | |
2839 | + MD_BUG(); | |
2840 | + return -1; | |
2841 | + } | |
2842 | + if (rdev->faulty) { | |
2843 | + MD_BUG(); | |
2844 | + return -1; | |
2845 | + } | |
2846 | + if (rdev->sb->md_magic != MD_SB_MAGIC) { | |
2847 | + MD_BUG(); | |
2848 | + return -1; | |
2849 | + } | |
2850 | + | |
2851 | + dev = rdev->dev; | |
2852 | + sb_offset = calc_dev_sboffset(dev, rdev->mddev, 1); | |
2853 | + if (rdev->sb_offset != sb_offset) { | |
2854 | + printk("%s's sb offset has changed from %d to %d, skipping\n", partition_name(dev), rdev->sb_offset, sb_offset); | |
2855 | + goto skip; | |
2856 | + } | |
2857 | + /* | |
2858 | + * If the disk went offline meanwhile and it's just a spare, then | |
2859 | + * it's size has changed to zero silently, and the MD code does | |
2860 | + * not yet know that it's faulty. | |
2861 | + */ | |
2862 | + size = calc_dev_size(dev, rdev->mddev, 1); | |
2863 | + if (size != rdev->size) { | |
2864 | + printk("%s's size has changed from %d to %d since import, skipping\n", partition_name(dev), rdev->size, size); | |
2865 | + goto skip; | |
2866 | + } | |
2867 | + | |
2868 | + printk("(write) %s's sb offset: %d\n", partition_name(dev), sb_offset); | |
2869 | + fsync_dev(dev); | |
2870 | + set_blocksize(dev, MD_SB_BYTES); | |
2871 | + bh = getblk(dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
2872 | + if (!bh) { | |
2873 | + printk(GETBLK_FAILED, partition_name(dev)); | |
2874 | + return 1; | |
2875 | + } | |
2876 | + memset(bh->b_data,0,bh->b_size); | |
2877 | + sb = (mdp_super_t *) bh->b_data; | |
2878 | + memcpy(sb, rdev->sb, MD_SB_BYTES); | |
2879 | + | |
2880 | + mark_buffer_uptodate(bh, 1); | |
2881 | + mark_buffer_dirty(bh, 1); | |
2882 | + ll_rw_block(WRITE, 1, &bh); | |
2883 | + wait_on_buffer(bh); | |
2884 | + brelse(bh); | |
2885 | + fsync_dev(dev); | |
2886 | +skip: | |
2887 | + return 0; | |
2888 | +} | |
2889 | +#undef GETBLK_FAILED KERN_ERR | |
2890 | + | |
2891 | +static void set_this_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |
2892 | +{ | |
2893 | + int i, ok = 0; | |
2894 | + mdp_disk_t *desc; | |
2895 | + | |
2896 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
2897 | + desc = mddev->sb->disks + i; | |
2898 | +#if 0 | |
2899 | + if (disk_faulty(desc)) { | |
2900 | + if (MKDEV(desc->major,desc->minor) == rdev->dev) | |
2901 | + ok = 1; | |
2902 | + continue; | |
2903 | + } | |
2904 | +#endif | |
2905 | + if (MKDEV(desc->major,desc->minor) == rdev->dev) { | |
2906 | + rdev->sb->this_disk = *desc; | |
2907 | + rdev->desc_nr = desc->number; | |
2908 | + ok = 1; | |
2909 | + break; | |
2910 | + } | |
2911 | + } | |
2912 | + | |
2913 | + if (!ok) { | |
2914 | + MD_BUG(); | |
2915 | + } | |
2916 | +} | |
2917 | + | |
2918 | +static int sync_sbs(mddev_t * mddev) | |
2919 | +{ | |
2920 | + mdk_rdev_t *rdev; | |
2921 | + mdp_super_t *sb; | |
2922 | + struct md_list_head *tmp; | |
2923 | + | |
2924 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2925 | + if (rdev->faulty) | |
2926 | + continue; | |
2927 | + sb = rdev->sb; | |
2928 | + *sb = *mddev->sb; | |
2929 | + set_this_disk(mddev, rdev); | |
2930 | + sb->sb_csum = calc_sb_csum(sb); | |
2931 | + } | |
2932 | + return 0; | |
2933 | +} | |
2934 | + | |
2935 | +int md_update_sb(mddev_t * mddev) | |
2936 | +{ | |
2937 | + int first, err, count = 100; | |
2938 | + struct md_list_head *tmp; | |
2939 | + mdk_rdev_t *rdev; | |
2940 | + __u64 ev; | |
2941 | + | |
2942 | +repeat: | |
2943 | + mddev->sb->utime = CURRENT_TIME; | |
2944 | + ev = get_unaligned(&mddev->sb->events); | |
2945 | + ++ev; | |
2946 | + put_unaligned(ev,&mddev->sb->events); | |
2947 | + if (ev == (__u64)0) { | |
2948 | + /* | |
2949 | + * oops, this 64-bit counter should never wrap. | |
2950 | + * Either we are in around ~1 trillion A.C., assuming | |
2951 | + * 1 reboot per second, or we have a bug: | |
2952 | + */ | |
2953 | + MD_BUG(); | |
2954 | + --ev; | |
2955 | + put_unaligned(ev,&mddev->sb->events); | |
2956 | + } | |
2957 | + sync_sbs(mddev); | |
2958 | + | |
2959 | + /* | |
2960 | + * do not write anything to disk if using | |
2961 | + * nonpersistent superblocks | |
2962 | + */ | |
2963 | + if (mddev->sb->not_persistent) | |
2964 | + return 0; | |
2965 | + | |
2966 | + printk(KERN_INFO "md: updating md%d RAID superblock on device\n", | |
2967 | + mdidx(mddev)); | |
2968 | + | |
2969 | + first = 1; | |
2970 | + err = 0; | |
2971 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
2972 | + if (!first) { | |
2973 | + first = 0; | |
2974 | + printk(", "); | |
2975 | + } | |
2976 | + if (rdev->faulty) | |
2977 | + printk("(skipping faulty "); | |
2978 | + printk("%s ", partition_name(rdev->dev)); | |
2979 | + if (!rdev->faulty) { | |
2980 | + printk("[events: %08lx]", | |
2981 | + (unsigned long)get_unaligned(&rdev->sb->events)); | |
2982 | + err += write_disk_sb(rdev); | |
2983 | + } else | |
2984 | + printk(")\n"); | |
2985 | + } | |
2986 | + printk(".\n"); | |
2987 | + if (err) { | |
2988 | + printk("errors occured during superblock update, repeating\n"); | |
2989 | + if (--count) | |
2990 | + goto repeat; | |
2991 | + printk("excessive errors occured during superblock update, exiting\n"); | |
2992 | + } | |
2993 | + return 0; | |
2994 | +} | |
2995 | + | |
2996 | +/* | |
2997 | + * Import a device. If 'on_disk', then sanity check the superblock | |
2998 | + * | |
2999 | + * mark the device faulty if: | |
3000 | + * | |
3001 | + * - the device is nonexistent (zero size) | |
3002 | + * - the device has no valid superblock | |
3003 | + * | |
3004 | + * a faulty rdev _never_ has rdev->sb set. | |
3005 | + */ | |
3006 | +static int md_import_device (kdev_t newdev, int on_disk) | |
3007 | +{ | |
3008 | + int err; | |
3009 | + mdk_rdev_t *rdev; | |
3010 | + unsigned int size; | |
3011 | + | |
3012 | + if (find_rdev_all(newdev)) | |
3013 | + return -EEXIST; | |
3014 | + | |
3015 | + rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL); | |
3016 | + if (!rdev) { | |
3017 | + printk("could not alloc mem for %s!\n", partition_name(newdev)); | |
3018 | + return -ENOMEM; | |
3019 | + } | |
3020 | + memset(rdev, 0, sizeof(*rdev)); | |
3021 | + | |
3022 | + if (!fs_may_mount(newdev)) { | |
3023 | + printk("md: can not import %s, has active inodes!\n", | |
3024 | + partition_name(newdev)); | |
3025 | + err = -EBUSY; | |
3026 | + goto abort_free; | |
3027 | + } | |
3028 | + | |
3029 | + if ((err = alloc_disk_sb(rdev))) | |
3030 | + goto abort_free; | |
3031 | + | |
3032 | + rdev->dev = newdev; | |
3033 | + if (lock_rdev(rdev)) { | |
3034 | + printk("md: could not lock %s, zero-size? Marking faulty.\n", | |
3035 | + partition_name(newdev)); | |
3036 | + err = -EINVAL; | |
3037 | + goto abort_free; | |
3038 | + } | |
3039 | + rdev->desc_nr = -1; | |
3040 | + rdev->faulty = 0; | |
3041 | + | |
3042 | + size = 0; | |
3043 | + if (blk_size[MAJOR(newdev)]) | |
3044 | + size = blk_size[MAJOR(newdev)][MINOR(newdev)]; | |
3045 | + if (!size) { | |
3046 | + printk("md: %s has zero size, marking faulty!\n", | |
3047 | + partition_name(newdev)); | |
3048 | + err = -EINVAL; | |
3049 | + goto abort_free; | |
3050 | + } | |
3051 | + | |
3052 | + if (on_disk) { | |
3053 | + if ((err = read_disk_sb(rdev))) { | |
3054 | + printk("md: could not read %s's sb, not importing!\n", | |
3055 | + partition_name(newdev)); | |
3056 | + goto abort_free; | |
3057 | + } | |
3058 | + if ((err = check_disk_sb(rdev))) { | |
3059 | + printk("md: %s has invalid sb, not importing!\n", | |
3060 | + partition_name(newdev)); | |
3061 | + goto abort_free; | |
3062 | + } | |
3063 | + | |
3064 | + rdev->old_dev = MKDEV(rdev->sb->this_disk.major, | |
3065 | + rdev->sb->this_disk.minor); | |
3066 | + rdev->desc_nr = rdev->sb->this_disk.number; | |
3067 | + } | |
3068 | + md_list_add(&rdev->all, &all_raid_disks); | |
3069 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
3070 | + | |
3071 | + if (rdev->faulty && rdev->sb) | |
3072 | + free_disk_sb(rdev); | |
3073 | + return 0; | |
3074 | + | |
3075 | +abort_free: | |
3076 | + if (rdev->sb) { | |
3077 | + if (rdev->inode) | |
3078 | + unlock_rdev(rdev); | |
3079 | + free_disk_sb(rdev); | |
3080 | + } | |
3081 | + kfree(rdev); | |
3082 | + return err; | |
3083 | +} | |
3084 | + | |
3085 | +/* | |
3086 | + * Check a full RAID array for plausibility | |
3087 | + */ | |
3088 | + | |
3089 | +#define INCONSISTENT KERN_ERR \ | |
3090 | +"md: fatal superblock inconsistency in %s -- removing from array\n" | |
3091 | + | |
3092 | +#define OUT_OF_DATE KERN_ERR \ | |
3093 | +"md: superblock update time inconsistency -- using the most recent one\n" | |
3094 | + | |
3095 | +#define OLD_VERSION KERN_ALERT \ | |
3096 | +"md: md%d: unsupported raid array version %d.%d.%d\n" | |
3097 | + | |
3098 | +#define NOT_CLEAN_IGNORE KERN_ERR \ | |
3099 | +"md: md%d: raid array is not clean -- starting background reconstruction\n" | |
3100 | + | |
3101 | +#define UNKNOWN_LEVEL KERN_ERR \ | |
3102 | +"md: md%d: unsupported raid level %d\n" | |
3103 | + | |
3104 | +static int analyze_sbs (mddev_t * mddev) | |
3105 | +{ | |
3106 | + int out_of_date = 0, i; | |
3107 | + struct md_list_head *tmp, *tmp2; | |
3108 | + mdk_rdev_t *rdev, *rdev2, *freshest; | |
3109 | + mdp_super_t *sb; | |
3110 | + | |
3111 | + /* | |
3112 | + * Verify the RAID superblock on each real device | |
3113 | + */ | |
3114 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3115 | + if (rdev->faulty) { | |
3116 | + MD_BUG(); | |
3117 | + goto abort; | |
3118 | + } | |
3119 | + if (!rdev->sb) { | |
3120 | + MD_BUG(); | |
3121 | + goto abort; | |
3122 | + } | |
3123 | + if (check_disk_sb(rdev)) | |
3124 | + goto abort; | |
3125 | + } | |
3126 | + | |
3127 | + /* | |
3128 | + * The superblock constant part has to be the same | |
3129 | + * for all disks in the array. | |
3130 | + */ | |
3131 | + sb = NULL; | |
3132 | + | |
3133 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3134 | + if (!sb) { | |
3135 | + sb = rdev->sb; | |
3136 | + continue; | |
3137 | + } | |
3138 | + if (!sb_equal(sb, rdev->sb)) { | |
3139 | + printk (INCONSISTENT, partition_name(rdev->dev)); | |
3140 | + kick_rdev_from_array(rdev); | |
3141 | + continue; | |
3142 | + } | |
3143 | + } | |
3144 | + | |
3145 | + /* | |
3146 | + * OK, we have all disks and the array is ready to run. Let's | |
3147 | + * find the freshest superblock, that one will be the superblock | |
3148 | + * that represents the whole array. | |
3149 | + */ | |
3150 | + if (!mddev->sb) | |
3151 | + if (alloc_array_sb(mddev)) | |
3152 | + goto abort; | |
3153 | + sb = mddev->sb; | |
3154 | + freshest = NULL; | |
3155 | + | |
3156 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3157 | + __u64 ev1, ev2; | |
3158 | + /* | |
3159 | + * if the checksum is invalid, use the superblock | |
3160 | + * only as a last resort. (decrease it's age by | |
3161 | + * one event) | |
3162 | + */ | |
3163 | + if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) { | |
3164 | + __u64 ev = get_unaligned(&rdev->sb->events); | |
3165 | + if (ev != (__u64)0) { | |
3166 | + --ev; | |
3167 | + put_unaligned(ev,&rdev->sb->events); | |
3168 | + } | |
3169 | + } | |
3170 | + | |
3171 | + printk("%s's event counter: %08lx\n", partition_name(rdev->dev), | |
3172 | + (unsigned long)get_unaligned(&rdev->sb->events)); | |
3173 | + if (!freshest) { | |
3174 | + freshest = rdev; | |
3175 | + continue; | |
3176 | + } | |
3177 | + /* | |
3178 | + * Find the newest superblock version | |
3179 | + */ | |
3180 | + ev1 = get_unaligned(&rdev->sb->events); | |
3181 | + ev2 = get_unaligned(&freshest->sb->events); | |
3182 | + if (ev1 != ev2) { | |
3183 | + out_of_date = 1; | |
3184 | + if (ev1 > ev2) | |
3185 | + freshest = rdev; | |
3186 | + } | |
3187 | + } | |
3188 | + if (out_of_date) { | |
3189 | + printk(OUT_OF_DATE); | |
3190 | + printk("freshest: %s\n", partition_name(freshest->dev)); | |
3191 | + } | |
3192 | + memcpy (sb, freshest->sb, sizeof(*sb)); | |
3193 | + | |
3194 | + /* | |
3195 | + * at this point we have picked the 'best' superblock | |
3196 | + * from all available superblocks. | |
3197 | + * now we validate this superblock and kick out possibly | |
3198 | + * failed disks. | |
3199 | + */ | |
3200 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3201 | + /* | |
3202 | + * Kick all non-fresh devices faulty | |
3203 | + */ | |
3204 | + __u64 ev1, ev2; | |
3205 | + ev1 = get_unaligned(&rdev->sb->events); | |
3206 | + ev2 = get_unaligned(&sb->events); | |
3207 | + ++ev1; | |
3208 | + if (ev1 < ev2) { | |
3209 | + printk("md: kicking non-fresh %s from array!\n", | |
3210 | + partition_name(rdev->dev)); | |
3211 | + kick_rdev_from_array(rdev); | |
3212 | + continue; | |
3213 | + } | |
3214 | + } | |
3215 | + | |
3216 | + /* | |
3217 | + * Fix up changed device names ... but only if this disk has a | |
3218 | + * recent update time. Use faulty checksum ones too. | |
3219 | + */ | |
3220 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3221 | + __u64 ev1, ev2, ev3; | |
3222 | + if (rdev->faulty) { /* REMOVEME */ | |
3223 | + MD_BUG(); | |
3224 | + goto abort; | |
3225 | + } | |
3226 | + ev1 = get_unaligned(&rdev->sb->events); | |
3227 | + ev2 = get_unaligned(&sb->events); | |
3228 | + ev3 = ev2; | |
3229 | + --ev3; | |
3230 | + if ((rdev->dev != rdev->old_dev) && | |
3231 | + ((ev1 == ev2) || (ev1 == ev3))) { | |
3232 | + mdp_disk_t *desc; | |
3233 | + | |
3234 | + printk("md: device name has changed from %s to %s since last import!\n", partition_name(rdev->old_dev), partition_name(rdev->dev)); | |
3235 | + if (rdev->desc_nr == -1) { | |
3236 | + MD_BUG(); | |
3237 | + goto abort; | |
3238 | + } | |
3239 | + desc = &sb->disks[rdev->desc_nr]; | |
3240 | + if (rdev->old_dev != MKDEV(desc->major, desc->minor)) { | |
3241 | + MD_BUG(); | |
3242 | + goto abort; | |
3243 | + } | |
3244 | + desc->major = MAJOR(rdev->dev); | |
3245 | + desc->minor = MINOR(rdev->dev); | |
3246 | + desc = &rdev->sb->this_disk; | |
3247 | + desc->major = MAJOR(rdev->dev); | |
3248 | + desc->minor = MINOR(rdev->dev); | |
3249 | + } | |
3250 | + } | |
3251 | + | |
3252 | + /* | |
3253 | + * Remove unavailable and faulty devices ... | |
3254 | + * | |
3255 | + * note that if an array becomes completely unrunnable due to | |
3256 | + * missing devices, we do not write the superblock back, so the | |
3257 | + * administrator has a chance to fix things up. The removal thus | |
3258 | + * only happens if it's nonfatal to the contents of the array. | |
3259 | + */ | |
3260 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
3261 | + int found; | |
3262 | + mdp_disk_t *desc; | |
3263 | + kdev_t dev; | |
3264 | + | |
3265 | + desc = sb->disks + i; | |
3266 | + dev = MKDEV(desc->major, desc->minor); | |
3267 | + | |
3268 | + /* | |
3269 | + * We kick faulty devices/descriptors immediately. | |
3270 | + */ | |
3271 | + if (disk_faulty(desc)) { | |
3272 | + found = 0; | |
3273 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3274 | + if (rdev->desc_nr != desc->number) | |
3275 | + continue; | |
3276 | + printk("md%d: kicking faulty %s!\n", | |
3277 | + mdidx(mddev),partition_name(rdev->dev)); | |
3278 | + kick_rdev_from_array(rdev); | |
3279 | + found = 1; | |
3280 | + break; | |
3281 | + } | |
3282 | + if (!found) { | |
3283 | + if (dev == MKDEV(0,0)) | |
3284 | + continue; | |
3285 | + printk("md%d: removing former faulty %s!\n", | |
3286 | + mdidx(mddev), partition_name(dev)); | |
3287 | + } | |
3288 | + remove_descriptor(desc, sb); | |
3289 | + continue; | |
3290 | + } | |
3291 | + | |
3292 | + if (dev == MKDEV(0,0)) | |
3293 | + continue; | |
3294 | + /* | |
3295 | + * Is this device present in the rdev ring? | |
3296 | + */ | |
3297 | + found = 0; | |
3298 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3299 | + if (rdev->desc_nr == desc->number) { | |
3300 | + found = 1; | |
3301 | + break; | |
3302 | + } | |
3303 | + } | |
3304 | + if (found) | |
3305 | + continue; | |
3306 | + | |
3307 | + printk("md%d: former device %s is unavailable, removing from array!\n", mdidx(mddev), partition_name(dev)); | |
3308 | + remove_descriptor(desc, sb); | |
3309 | + } | |
3310 | + | |
3311 | + /* | |
3312 | + * Double check wether all devices mentioned in the | |
3313 | + * superblock are in the rdev ring. | |
3314 | + */ | |
3315 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
3316 | + mdp_disk_t *desc; | |
3317 | + kdev_t dev; | |
3318 | + | |
3319 | + desc = sb->disks + i; | |
3320 | + dev = MKDEV(desc->major, desc->minor); | |
3321 | + | |
3322 | + if (dev == MKDEV(0,0)) | |
3323 | + continue; | |
3324 | + | |
3325 | + if (disk_faulty(desc)) { | |
3326 | + MD_BUG(); | |
3327 | + goto abort; | |
3328 | + } | |
3329 | + | |
3330 | + rdev = find_rdev(mddev, dev); | |
3331 | + if (!rdev) { | |
3332 | + MD_BUG(); | |
3333 | + goto abort; | |
3334 | + } | |
3335 | + } | |
3336 | + | |
3337 | + /* | |
3338 | + * Do a final reality check. | |
3339 | + */ | |
3340 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3341 | + if (rdev->desc_nr == -1) { | |
3342 | + MD_BUG(); | |
3343 | + goto abort; | |
3344 | + } | |
3345 | + /* | |
3346 | + * is the desc_nr unique? | |
3347 | + */ | |
3348 | + ITERATE_RDEV(mddev,rdev2,tmp2) { | |
3349 | + if ((rdev2 != rdev) && | |
3350 | + (rdev2->desc_nr == rdev->desc_nr)) { | |
3351 | + MD_BUG(); | |
3352 | + goto abort; | |
3353 | + } | |
3354 | + } | |
3355 | + /* | |
3356 | + * is the device unique? | |
3357 | + */ | |
3358 | + ITERATE_RDEV(mddev,rdev2,tmp2) { | |
3359 | + if ((rdev2 != rdev) && | |
3360 | + (rdev2->dev == rdev->dev)) { | |
3361 | + MD_BUG(); | |
3362 | + goto abort; | |
3363 | + } | |
3364 | + } | |
3365 | + } | |
3366 | + | |
3367 | + /* | |
3368 | + * Check if we can support this RAID array | |
3369 | + */ | |
3370 | + if (sb->major_version != MD_MAJOR_VERSION || | |
3371 | + sb->minor_version > MD_MINOR_VERSION) { | |
3372 | + | |
3373 | + printk (OLD_VERSION, mdidx(mddev), sb->major_version, | |
3374 | + sb->minor_version, sb->patch_version); | |
3375 | + goto abort; | |
3376 | + } | |
3377 | + | |
3378 | + if ((sb->state != (1 << MD_SB_CLEAN)) && ((sb->level == 1) || | |
3379 | + (sb->level == 4) || (sb->level == 5))) | |
3380 | + printk (NOT_CLEAN_IGNORE, mdidx(mddev)); | |
3381 | + | |
3382 | + return 0; | |
3383 | +abort: | |
3384 | + return 1; | |
3385 | +} | |
3386 | + | |
3387 | +#undef INCONSISTENT | |
3388 | +#undef OUT_OF_DATE | |
3389 | +#undef OLD_VERSION | |
3390 | +#undef OLD_LEVEL | |
3391 | + | |
3392 | +static int device_size_calculation (mddev_t * mddev) | |
3393 | +{ | |
3394 | + int data_disks = 0, persistent; | |
3395 | + unsigned int readahead; | |
3396 | + mdp_super_t *sb = mddev->sb; | |
3397 | + struct md_list_head *tmp; | |
3398 | + mdk_rdev_t *rdev; | |
3399 | + | |
3400 | + /* | |
3401 | + * Do device size calculation. Bail out if too small. | |
3402 | + * (we have to do this after having validated chunk_size, | |
3403 | + * because device size has to be modulo chunk_size) | |
3404 | + */ | |
3405 | + persistent = !mddev->sb->not_persistent; | |
3406 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3407 | + if (rdev->faulty) | |
3408 | + continue; | |
3409 | + if (rdev->size) { | |
3410 | + MD_BUG(); | |
3411 | + continue; | |
3412 | + } | |
3413 | + rdev->size = calc_dev_size(rdev->dev, mddev, persistent); | |
3414 | + if (rdev->size < sb->chunk_size / 1024) { | |
3415 | + printk (KERN_WARNING | |
3416 | + "Dev %s smaller than chunk_size: %dk < %dk\n", | |
3417 | + partition_name(rdev->dev), | |
3418 | + rdev->size, sb->chunk_size / 1024); | |
3419 | + return -EINVAL; | |
3420 | + } | |
3421 | + } | |
3422 | + | |
3423 | + switch (sb->level) { | |
3424 | + case -3: | |
3425 | + data_disks = 1; | |
3426 | + break; | |
3427 | + case -2: | |
3428 | + data_disks = 1; | |
3429 | + break; | |
3430 | + case -1: | |
3431 | + zoned_raid_size(mddev); | |
3432 | + data_disks = 1; | |
3433 | + break; | |
3434 | + case 0: | |
3435 | + zoned_raid_size(mddev); | |
3436 | + data_disks = sb->raid_disks; | |
3437 | + break; | |
3438 | + case 1: | |
3439 | + data_disks = 1; | |
3440 | + break; | |
3441 | + case 4: | |
3442 | + case 5: | |
3443 | + data_disks = sb->raid_disks-1; | |
3444 | + break; | |
3445 | + default: | |
3446 | + printk (UNKNOWN_LEVEL, mdidx(mddev), sb->level); | |
3447 | + goto abort; | |
3448 | + } | |
3449 | + if (!md_size[mdidx(mddev)]) | |
3450 | + md_size[mdidx(mddev)] = sb->size * data_disks; | |
3451 | + | |
3452 | + readahead = MD_READAHEAD; | |
3453 | + if ((sb->level == 0) || (sb->level == 4) || (sb->level == 5)) | |
3454 | + readahead = mddev->sb->chunk_size * 4 * data_disks; | |
3455 | + if (readahead < data_disks * MAX_SECTORS*512*2) | |
3456 | + readahead = data_disks * MAX_SECTORS*512*2; | |
3457 | + else { | |
3458 | + if (sb->level == -3) | |
3459 | + readahead = 0; | |
3460 | + } | |
3461 | + md_maxreadahead[mdidx(mddev)] = readahead; | |
3462 | + | |
3463 | + printk(KERN_INFO "md%d: max total readahead window set to %dk\n", | |
3464 | + mdidx(mddev), readahead/1024); | |
3465 | + | |
3466 | + printk(KERN_INFO | |
3467 | + "md%d: %d data-disks, max readahead per data-disk: %dk\n", | |
3468 | + mdidx(mddev), data_disks, readahead/data_disks/1024); | |
3469 | + return 0; | |
3470 | +abort: | |
3471 | + return 1; | |
3472 | +} | |
3473 | + | |
3474 | + | |
3475 | +#define TOO_BIG_CHUNKSIZE KERN_ERR \ | |
3476 | +"too big chunk_size: %d > %d\n" | |
3477 | + | |
3478 | +#define TOO_SMALL_CHUNKSIZE KERN_ERR \ | |
3479 | +"too small chunk_size: %d < %ld\n" | |
3480 | + | |
3481 | +#define BAD_CHUNKSIZE KERN_ERR \ | |
3482 | +"no chunksize specified, see 'man raidtab'\n" | |
3483 | + | |
3484 | +static int do_md_run (mddev_t * mddev) | |
3485 | +{ | |
3486 | + int pnum, err; | |
3487 | + int chunk_size; | |
3488 | + struct md_list_head *tmp; | |
3489 | + mdk_rdev_t *rdev; | |
3490 | + | |
3491 | + | |
3492 | + if (!mddev->nb_dev) { | |
3493 | + MD_BUG(); | |
3494 | + return -EINVAL; | |
3495 | + } | |
3496 | + | |
3497 | + if (mddev->pers) | |
3498 | + return -EBUSY; | |
3499 | + | |
3500 | + /* | |
3501 | + * Resize disks to align partitions size on a given | |
3502 | + * chunk size. | |
3503 | + */ | |
3504 | + md_size[mdidx(mddev)] = 0; | |
3505 | + | |
3506 | + /* | |
3507 | + * Analyze all RAID superblock(s) | |
3508 | + */ | |
3509 | + if (analyze_sbs(mddev)) { | |
3510 | + MD_BUG(); | |
3511 | + return -EINVAL; | |
3512 | + } | |
3513 | + | |
3514 | + chunk_size = mddev->sb->chunk_size; | |
3515 | + pnum = level_to_pers(mddev->sb->level); | |
3516 | + | |
3517 | + mddev->param.chunk_size = chunk_size; | |
3518 | + mddev->param.personality = pnum; | |
3519 | + | |
3520 | + if (chunk_size > MAX_CHUNK_SIZE) { | |
3521 | + printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE); | |
3522 | + return -EINVAL; | |
3523 | + } | |
3524 | + /* | |
3525 | + * chunk-size has to be a power of 2 and multiples of PAGE_SIZE | |
3526 | + */ | |
3527 | + if ( (1 << ffz(~chunk_size)) != chunk_size) { | |
3528 | + MD_BUG(); | |
3529 | + return -EINVAL; | |
3530 | + } | |
3531 | + if (chunk_size < PAGE_SIZE) { | |
3532 | + printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE); | |
3533 | + return -EINVAL; | |
3534 | + } | |
3535 | + | |
3536 | + if (pnum >= MAX_PERSONALITY) { | |
3537 | + MD_BUG(); | |
3538 | + return -EINVAL; | |
3539 | + } | |
3540 | + | |
3541 | + if ((pnum != RAID1) && (pnum != LINEAR) && !chunk_size) { | |
3542 | + /* | |
3543 | + * 'default chunksize' in the old md code used to | |
3544 | + * be PAGE_SIZE, baaad. | |
3545 | + * we abort here to be on the safe side. We dont | |
3546 | + * want to continue the bad practice. | |
3547 | + */ | |
3548 | + printk(BAD_CHUNKSIZE); | |
3549 | + return -EINVAL; | |
3550 | + } | |
3551 | + | |
3552 | + if (!pers[pnum]) | |
3553 | + { | |
3554 | +#ifdef CONFIG_KMOD | |
3555 | + char module_name[80]; | |
3556 | + sprintf (module_name, "md-personality-%d", pnum); | |
3557 | + request_module (module_name); | |
3558 | + if (!pers[pnum]) | |
3559 | +#endif | |
3560 | + return -EINVAL; | |
3561 | + } | |
3562 | + | |
3563 | + if (device_size_calculation(mddev)) | |
3564 | + return -EINVAL; | |
3565 | + | |
3566 | + /* | |
3567 | + * Drop all container device buffers, from now on | |
3568 | + * the only valid external interface is through the md | |
3569 | + * device. | |
3570 | + */ | |
3571 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3572 | + if (rdev->faulty) | |
3573 | + continue; | |
3574 | + fsync_dev(rdev->dev); | |
3575 | + invalidate_buffers(rdev->dev); | |
3576 | + } | |
3577 | + | |
3578 | + mddev->pers = pers[pnum]; | |
3579 | + | |
3580 | + err = mddev->pers->run(mddev); | |
3581 | + if (err) { | |
3582 | + printk("pers->run() failed ...\n"); | |
3583 | + mddev->pers = NULL; | |
3584 | + return -EINVAL; | |
3585 | + } | |
3586 | + | |
3587 | + mddev->sb->state &= ~(1 << MD_SB_CLEAN); | |
3588 | + md_update_sb(mddev); | |
3589 | + | |
3590 | + /* | |
3591 | + * md_size has units of 1K blocks, which are | |
3592 | + * twice as large as sectors. | |
3593 | + */ | |
3594 | + md_hd_struct[mdidx(mddev)].start_sect = 0; | |
3595 | + md_hd_struct[mdidx(mddev)].nr_sects = md_size[mdidx(mddev)] << 1; | |
3596 | + | |
3597 | + read_ahead[MD_MAJOR] = 1024; | |
3598 | + return (0); | |
3599 | +} | |
3600 | + | |
3601 | +#undef TOO_BIG_CHUNKSIZE | |
3602 | +#undef BAD_CHUNKSIZE | |
3603 | + | |
3604 | +#define OUT(x) do { err = (x); goto out; } while (0) | |
3605 | + | |
3606 | +static int restart_array (mddev_t *mddev) | |
3607 | +{ | |
3608 | + int err = 0; | |
3609 | + | |
3610 | + /* | |
3611 | + * Complain if it has no devices | |
3612 | + */ | |
3613 | + if (!mddev->nb_dev) | |
3614 | + OUT(-ENXIO); | |
3615 | + | |
3616 | + if (mddev->pers) { | |
3617 | + if (!mddev->ro) | |
3618 | + OUT(-EBUSY); | |
3619 | + | |
3620 | + mddev->ro = 0; | |
3621 | + set_device_ro(mddev_to_kdev(mddev), 0); | |
3622 | + | |
3623 | + printk (KERN_INFO | |
3624 | + "md%d switched to read-write mode.\n", mdidx(mddev)); | |
3625 | + /* | |
3626 | + * Kick recovery or resync if necessary | |
3627 | + */ | |
3628 | + md_recover_arrays(); | |
3629 | + if (mddev->pers->restart_resync) | |
3630 | + mddev->pers->restart_resync(mddev); | |
3631 | + } else | |
3632 | + err = -EINVAL; | |
3633 | + | |
3634 | +out: | |
3635 | + return err; | |
3636 | +} | |
3637 | + | |
3638 | +#define STILL_MOUNTED KERN_WARNING \ | |
3639 | +"md: md%d still mounted.\n" | |
3640 | + | |
3641 | +static int do_md_stop (mddev_t * mddev, int ro) | |
3642 | +{ | |
3643 | + int err = 0, resync_interrupted = 0; | |
3644 | + kdev_t dev = mddev_to_kdev(mddev); | |
3645 | + | |
3646 | + if (!ro && !fs_may_mount (dev)) { | |
3647 | + printk (STILL_MOUNTED, mdidx(mddev)); | |
3648 | + OUT(-EBUSY); | |
3649 | + } | |
3650 | + | |
3651 | + /* | |
3652 | + * complain if it's already stopped | |
3653 | + */ | |
3654 | + if (!mddev->nb_dev) | |
3655 | + OUT(-ENXIO); | |
3656 | + | |
3657 | + if (mddev->pers) { | |
3658 | + /* | |
3659 | + * It is safe to call stop here, it only frees private | |
3660 | + * data. Also, it tells us if a device is unstoppable | |
3661 | + * (eg. resyncing is in progress) | |
3662 | + */ | |
3663 | + if (mddev->pers->stop_resync) | |
3664 | + if (mddev->pers->stop_resync(mddev)) | |
3665 | + resync_interrupted = 1; | |
3666 | + | |
3667 | + if (mddev->recovery_running) | |
3668 | + md_interrupt_thread(md_recovery_thread); | |
3669 | + | |
3670 | + /* | |
3671 | + * This synchronizes with signal delivery to the | |
3672 | + * resync or reconstruction thread. It also nicely | |
3673 | + * hangs the process if some reconstruction has not | |
3674 | + * finished. | |
3675 | + */ | |
3676 | + down(&mddev->recovery_sem); | |
3677 | + up(&mddev->recovery_sem); | |
3678 | + | |
3679 | + /* | |
3680 | + * sync and invalidate buffers because we cannot kill the | |
3681 | + * main thread with valid IO transfers still around. | |
3682 | + * the kernel lock protects us from new requests being | |
3683 | + * added after invalidate_buffers(). | |
3684 | + */ | |
3685 | + fsync_dev (mddev_to_kdev(mddev)); | |
3686 | + fsync_dev (dev); | |
3687 | + invalidate_buffers (dev); | |
3688 | + | |
3689 | + if (ro) { | |
3690 | + if (mddev->ro) | |
3691 | + OUT(-ENXIO); | |
3692 | + mddev->ro = 1; | |
3693 | + } else { | |
3694 | + if (mddev->ro) | |
3695 | + set_device_ro(dev, 0); | |
3696 | + if (mddev->pers->stop(mddev)) { | |
3697 | + if (mddev->ro) | |
3698 | + set_device_ro(dev, 1); | |
3699 | + OUT(-EBUSY); | |
3700 | + } | |
3701 | + if (mddev->ro) | |
3702 | + mddev->ro = 0; | |
3703 | + } | |
3704 | + if (mddev->sb) { | |
3705 | + /* | |
3706 | + * mark it clean only if there was no resync | |
3707 | + * interrupted. | |
3708 | + */ | |
3709 | + if (!mddev->recovery_running && !resync_interrupted) { | |
3710 | + printk("marking sb clean...\n"); | |
3711 | + mddev->sb->state |= 1 << MD_SB_CLEAN; | |
3712 | + } | |
3713 | + md_update_sb(mddev); | |
3714 | + } | |
3715 | + if (ro) | |
3716 | + set_device_ro(dev, 1); | |
3717 | + } | |
3718 | + | |
3719 | + /* | |
3720 | + * Free resources if final stop | |
3721 | + */ | |
3722 | + if (!ro) { | |
3723 | + export_array(mddev); | |
3724 | + md_size[mdidx(mddev)] = 0; | |
3725 | + md_hd_struct[mdidx(mddev)].nr_sects = 0; | |
3726 | + free_mddev(mddev); | |
3727 | + | |
3728 | + printk (KERN_INFO "md%d stopped.\n", mdidx(mddev)); | |
3729 | + } else | |
3730 | + printk (KERN_INFO | |
3731 | + "md%d switched to read-only mode.\n", mdidx(mddev)); | |
3732 | +out: | |
3733 | + return err; | |
3734 | +} | |
3735 | + | |
3736 | +#undef OUT | |
3737 | + | |
3738 | +/* | |
3739 | + * We have to safely support old arrays too. | |
3740 | + */ | |
3741 | +int detect_old_array (mdp_super_t *sb) | |
3742 | +{ | |
3743 | + if (sb->major_version > 0) | |
3744 | + return 0; | |
3745 | + if (sb->minor_version >= 90) | |
3746 | + return 0; | |
3747 | + | |
3748 | + return -EINVAL; | |
3749 | +} | |
3750 | + | |
3751 | + | |
3752 | +static void autorun_array (mddev_t *mddev) | |
3753 | +{ | |
3754 | + mdk_rdev_t *rdev; | |
3755 | + struct md_list_head *tmp; | |
3756 | + int err; | |
3757 | + | |
3758 | + if (mddev->disks.prev == &mddev->disks) { | |
3759 | + MD_BUG(); | |
3760 | + return; | |
3761 | + } | |
3762 | + | |
3763 | + printk("running: "); | |
3764 | + | |
3765 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
3766 | + printk("<%s>", partition_name(rdev->dev)); | |
3767 | + } | |
3768 | + printk("\nnow!\n"); | |
3769 | + | |
3770 | + err = do_md_run (mddev); | |
3771 | + if (err) { | |
3772 | + printk("do_md_run() returned %d\n", err); | |
3773 | + /* | |
3774 | + * prevent the writeback of an unrunnable array | |
3775 | + */ | |
3776 | + mddev->sb_dirty = 0; | |
3777 | + do_md_stop (mddev, 0); | |
3778 | + } | |
3779 | +} | |
3780 | + | |
3781 | +/* | |
3782 | + * lets try to run arrays based on all disks that have arrived | |
3783 | + * until now. (those are in the ->pending list) | |
3784 | + * | |
3785 | + * the method: pick the first pending disk, collect all disks with | |
3786 | + * the same UUID, remove all from the pending list and put them into | |
3787 | + * the 'same_array' list. Then order this list based on superblock | |
3788 | + * update time (freshest comes first), kick out 'old' disks and | |
3789 | + * compare superblocks. If everything's fine then run it. | |
3790 | + */ | |
3791 | +static void autorun_devices (void) | |
3792 | +{ | |
3793 | + struct md_list_head candidates; | |
3794 | + struct md_list_head *tmp; | |
3795 | + mdk_rdev_t *rdev0, *rdev; | |
3796 | + mddev_t *mddev; | |
3797 | + kdev_t md_kdev; | |
3798 | + | |
3799 | + | |
3800 | + printk("autorun ...\n"); | |
3801 | + while (pending_raid_disks.next != &pending_raid_disks) { | |
3802 | + rdev0 = md_list_entry(pending_raid_disks.next, | |
3803 | + mdk_rdev_t, pending); | |
3804 | + | |
3805 | + printk("considering %s ...\n", partition_name(rdev0->dev)); | |
3806 | + MD_INIT_LIST_HEAD(&candidates); | |
3807 | + ITERATE_RDEV_PENDING(rdev,tmp) { | |
3808 | + if (uuid_equal(rdev0, rdev)) { | |
3809 | + if (!sb_equal(rdev0->sb, rdev->sb)) { | |
3810 | + printk("%s has same UUID as %s, but superblocks differ ...\n", partition_name(rdev->dev), partition_name(rdev0->dev)); | |
3811 | + continue; | |
3812 | + } | |
3813 | + printk(" adding %s ...\n", partition_name(rdev->dev)); | |
3814 | + md_list_del(&rdev->pending); | |
3815 | + md_list_add(&rdev->pending, &candidates); | |
3816 | + } | |
3817 | + } | |
3818 | + /* | |
3819 | + * now we have a set of devices, with all of them having | |
3820 | + * mostly sane superblocks. It's time to allocate the | |
3821 | + * mddev. | |
3822 | */ | |
3823 | - if (analyze_one_sb (realdev)) | |
3824 | + md_kdev = MKDEV(MD_MAJOR, rdev0->sb->md_minor); | |
3825 | + mddev = kdev_to_mddev(md_kdev); | |
3826 | + if (mddev) { | |
3827 | + printk("md%d already running, cannot run %s\n", | |
3828 | + mdidx(mddev), partition_name(rdev0->dev)); | |
3829 | + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) | |
3830 | + export_rdev(rdev); | |
3831 | + continue; | |
3832 | + } | |
3833 | + mddev = alloc_mddev(md_kdev); | |
3834 | + printk("created md%d\n", mdidx(mddev)); | |
3835 | + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { | |
3836 | + bind_rdev_to_array(rdev, mddev); | |
3837 | + md_list_del(&rdev->pending); | |
3838 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
3839 | + } | |
3840 | + autorun_array(mddev); | |
3841 | + } | |
3842 | + printk("... autorun DONE.\n"); | |
3843 | +} | |
3844 | + | |
3845 | +/* | |
3846 | + * import RAID devices based on one partition | |
3847 | + * if possible, the array gets run as well. | |
3848 | + */ | |
3849 | + | |
3850 | +#define BAD_VERSION KERN_ERR \ | |
3851 | +"md: %s has RAID superblock version 0.%d, autodetect needs v0.90 or higher\n" | |
3852 | + | |
3853 | +#define OUT_OF_MEM KERN_ALERT \ | |
3854 | +"md: out of memory.\n" | |
3855 | + | |
3856 | +#define NO_DEVICE KERN_ERR \ | |
3857 | +"md: disabled device %s\n" | |
3858 | + | |
3859 | +#define AUTOADD_FAILED KERN_ERR \ | |
3860 | +"md: auto-adding devices to md%d FAILED (error %d).\n" | |
3861 | + | |
3862 | +#define AUTOADD_FAILED_USED KERN_ERR \ | |
3863 | +"md: cannot auto-add device %s to md%d, already used.\n" | |
3864 | + | |
3865 | +#define AUTORUN_FAILED KERN_ERR \ | |
3866 | +"md: auto-running md%d FAILED (error %d).\n" | |
3867 | + | |
3868 | +#define MDDEV_BUSY KERN_ERR \ | |
3869 | +"md: cannot auto-add to md%d, already running.\n" | |
3870 | + | |
3871 | +#define AUTOADDING KERN_INFO \ | |
3872 | +"md: auto-adding devices to md%d, based on %s's superblock.\n" | |
3873 | + | |
3874 | +#define AUTORUNNING KERN_INFO \ | |
3875 | +"md: auto-running md%d.\n" | |
3876 | + | |
3877 | +static int autostart_array (kdev_t startdev) | |
3878 | +{ | |
3879 | + int err = -EINVAL, i; | |
3880 | + mdp_super_t *sb = NULL; | |
3881 | + mdk_rdev_t *start_rdev = NULL, *rdev; | |
3882 | + | |
3883 | + if (md_import_device(startdev, 1)) { | |
3884 | + printk("could not import %s!\n", partition_name(startdev)); | |
3885 | + goto abort; | |
3886 | + } | |
3887 | + | |
3888 | + start_rdev = find_rdev_all(startdev); | |
3889 | + if (!start_rdev) { | |
3890 | + MD_BUG(); | |
3891 | + goto abort; | |
3892 | + } | |
3893 | + if (start_rdev->faulty) { | |
3894 | + printk("can not autostart based on faulty %s!\n", | |
3895 | + partition_name(startdev)); | |
3896 | + goto abort; | |
3897 | + } | |
3898 | + md_list_add(&start_rdev->pending, &pending_raid_disks); | |
3899 | + | |
3900 | + sb = start_rdev->sb; | |
3901 | + | |
3902 | + err = detect_old_array(sb); | |
3903 | + if (err) { | |
3904 | + printk("array version is too old to be autostarted, use raidtools 0.90 mkraid --upgrade\nto upgrade the array without data loss!\n"); | |
3905 | + goto abort; | |
3906 | + } | |
3907 | + | |
3908 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
3909 | + mdp_disk_t *desc; | |
3910 | + kdev_t dev; | |
3911 | + | |
3912 | + desc = sb->disks + i; | |
3913 | + dev = MKDEV(desc->major, desc->minor); | |
3914 | + | |
3915 | + if (dev == MKDEV(0,0)) | |
3916 | + continue; | |
3917 | + if (dev == startdev) | |
3918 | + continue; | |
3919 | + if (md_import_device(dev, 1)) { | |
3920 | + printk("could not import %s, trying to run array nevertheless.\n", partition_name(dev)); | |
3921 | + continue; | |
3922 | + } | |
3923 | + rdev = find_rdev_all(dev); | |
3924 | + if (!rdev) { | |
3925 | + MD_BUG(); | |
3926 | + goto abort; | |
3927 | + } | |
3928 | + md_list_add(&rdev->pending, &pending_raid_disks); | |
3929 | + } | |
3930 | + | |
3931 | + /* | |
3932 | + * possibly return codes | |
3933 | + */ | |
3934 | + autorun_devices(); | |
3935 | + return 0; | |
3936 | + | |
3937 | +abort: | |
3938 | + if (start_rdev) | |
3939 | + export_rdev(start_rdev); | |
3940 | + return err; | |
3941 | +} | |
3942 | + | |
3943 | +#undef BAD_VERSION | |
3944 | +#undef OUT_OF_MEM | |
3945 | +#undef NO_DEVICE | |
3946 | +#undef AUTOADD_FAILED_USED | |
3947 | +#undef AUTOADD_FAILED | |
3948 | +#undef AUTORUN_FAILED | |
3949 | +#undef AUTOADDING | |
3950 | +#undef AUTORUNNING | |
3951 | + | |
3952 | +struct { | |
3953 | + int set; | |
3954 | + int noautodetect; | |
3955 | + | |
3956 | +} raid_setup_args md__initdata = { 0, 0 }; | |
3957 | + | |
3958 | +/* | |
3959 | + * Searches all registered partitions for autorun RAID arrays | |
3960 | + * at boot time. | |
3961 | + */ | |
3962 | +md__initfunc(void autodetect_raid(void)) | |
3963 | +{ | |
3964 | +#ifdef CONFIG_AUTODETECT_RAID | |
3965 | + struct gendisk *disk; | |
3966 | + mdk_rdev_t *rdev; | |
3967 | + int i; | |
3968 | + | |
3969 | + if (raid_setup_args.noautodetect) { | |
3970 | + printk(KERN_INFO "skipping autodetection of RAID arrays\n"); | |
3971 | + return; | |
3972 | + } | |
3973 | + printk(KERN_INFO "autodetecting RAID arrays\n"); | |
3974 | + | |
3975 | + for (disk = gendisk_head ; disk ; disk = disk->next) { | |
3976 | + for (i = 0; i < disk->max_p*disk->max_nr; i++) { | |
3977 | + kdev_t dev = MKDEV(disk->major,i); | |
3978 | + | |
3979 | + if (disk->part[i].type == LINUX_OLD_RAID_PARTITION) { | |
3980 | + printk(KERN_ALERT | |
3981 | +"md: %s's partition type has to be changed from type 0x86 to type 0xfd\n" | |
3982 | +" to maintain interoperability with other OSs! Autodetection support for\n" | |
3983 | +" type 0x86 will be deleted after some migration timeout. Sorry.\n", | |
3984 | + partition_name(dev)); | |
3985 | + disk->part[i].type = LINUX_RAID_PARTITION; | |
3986 | + } | |
3987 | + if (disk->part[i].type != LINUX_RAID_PARTITION) | |
3988 | + continue; | |
3989 | + | |
3990 | + if (md_import_device(dev,1)) { | |
3991 | + printk(KERN_ALERT "could not import %s!\n", | |
3992 | + partition_name(dev)); | |
3993 | + continue; | |
3994 | + } | |
3995 | + /* | |
3996 | + * Sanity checks: | |
3997 | + */ | |
3998 | + rdev = find_rdev_all(dev); | |
3999 | + if (!rdev) { | |
4000 | + MD_BUG(); | |
4001 | + continue; | |
4002 | + } | |
4003 | + if (rdev->faulty) { | |
4004 | + MD_BUG(); | |
4005 | + continue; | |
4006 | + } | |
4007 | + md_list_add(&rdev->pending, &pending_raid_disks); | |
4008 | + } | |
4009 | + } | |
4010 | + | |
4011 | + autorun_devices(); | |
4012 | +#endif | |
4013 | +} | |
4014 | + | |
4015 | +static int get_version (void * arg) | |
4016 | +{ | |
4017 | + mdu_version_t ver; | |
4018 | + | |
4019 | + ver.major = MD_MAJOR_VERSION; | |
4020 | + ver.minor = MD_MINOR_VERSION; | |
4021 | + ver.patchlevel = MD_PATCHLEVEL_VERSION; | |
4022 | + | |
4023 | + if (md_copy_to_user(arg, &ver, sizeof(ver))) | |
4024 | + return -EFAULT; | |
4025 | + | |
4026 | + return 0; | |
4027 | +} | |
4028 | + | |
4029 | +#define SET_FROM_SB(x) info.x = mddev->sb->x | |
4030 | +static int get_array_info (mddev_t * mddev, void * arg) | |
4031 | +{ | |
4032 | + mdu_array_info_t info; | |
4033 | + | |
4034 | + if (!mddev->sb) | |
4035 | + return -EINVAL; | |
4036 | + | |
4037 | + SET_FROM_SB(major_version); | |
4038 | + SET_FROM_SB(minor_version); | |
4039 | + SET_FROM_SB(patch_version); | |
4040 | + SET_FROM_SB(ctime); | |
4041 | + SET_FROM_SB(level); | |
4042 | + SET_FROM_SB(size); | |
4043 | + SET_FROM_SB(nr_disks); | |
4044 | + SET_FROM_SB(raid_disks); | |
4045 | + SET_FROM_SB(md_minor); | |
4046 | + SET_FROM_SB(not_persistent); | |
4047 | + | |
4048 | + SET_FROM_SB(utime); | |
4049 | + SET_FROM_SB(state); | |
4050 | + SET_FROM_SB(active_disks); | |
4051 | + SET_FROM_SB(working_disks); | |
4052 | + SET_FROM_SB(failed_disks); | |
4053 | + SET_FROM_SB(spare_disks); | |
4054 | + | |
4055 | + SET_FROM_SB(layout); | |
4056 | + SET_FROM_SB(chunk_size); | |
4057 | + | |
4058 | + if (md_copy_to_user(arg, &info, sizeof(info))) | |
4059 | + return -EFAULT; | |
4060 | + | |
4061 | + return 0; | |
4062 | +} | |
4063 | +#undef SET_FROM_SB | |
4064 | + | |
4065 | +#define SET_FROM_SB(x) info.x = mddev->sb->disks[nr].x | |
4066 | +static int get_disk_info (mddev_t * mddev, void * arg) | |
4067 | +{ | |
4068 | + mdu_disk_info_t info; | |
4069 | + unsigned int nr; | |
4070 | + | |
4071 | + if (!mddev->sb) | |
4072 | + return -EINVAL; | |
4073 | + | |
4074 | + if (md_copy_from_user(&info, arg, sizeof(info))) | |
4075 | + return -EFAULT; | |
4076 | + | |
4077 | + nr = info.number; | |
4078 | + if (nr >= mddev->sb->nr_disks) | |
4079 | + return -EINVAL; | |
4080 | + | |
4081 | + SET_FROM_SB(major); | |
4082 | + SET_FROM_SB(minor); | |
4083 | + SET_FROM_SB(raid_disk); | |
4084 | + SET_FROM_SB(state); | |
4085 | + | |
4086 | + if (md_copy_to_user(arg, &info, sizeof(info))) | |
4087 | + return -EFAULT; | |
4088 | + | |
4089 | + return 0; | |
4090 | +} | |
4091 | +#undef SET_FROM_SB | |
4092 | + | |
4093 | +#define SET_SB(x) mddev->sb->disks[nr].x = info.x | |
4094 | + | |
4095 | +static int add_new_disk (mddev_t * mddev, void * arg) | |
4096 | +{ | |
4097 | + int err, size, persistent; | |
4098 | + mdu_disk_info_t info; | |
4099 | + mdk_rdev_t *rdev; | |
4100 | + unsigned int nr; | |
4101 | + kdev_t dev; | |
4102 | + | |
4103 | + if (!mddev->sb) | |
4104 | + return -EINVAL; | |
4105 | + | |
4106 | + if (md_copy_from_user(&info, arg, sizeof(info))) | |
4107 | + return -EFAULT; | |
4108 | + | |
4109 | + nr = info.number; | |
4110 | + if (nr >= mddev->sb->nr_disks) | |
4111 | + return -EINVAL; | |
4112 | + | |
4113 | + dev = MKDEV(info.major,info.minor); | |
4114 | + | |
4115 | + if (find_rdev_all(dev)) { | |
4116 | + printk("device %s already used in a RAID array!\n", | |
4117 | + partition_name(dev)); | |
4118 | + return -EBUSY; | |
4119 | + } | |
4120 | + | |
4121 | + SET_SB(number); | |
4122 | + SET_SB(major); | |
4123 | + SET_SB(minor); | |
4124 | + SET_SB(raid_disk); | |
4125 | + SET_SB(state); | |
4126 | + | |
4127 | + if ((info.state & (1<<MD_DISK_FAULTY))==0) { | |
4128 | + err = md_import_device (dev, 0); | |
4129 | + if (err) { | |
4130 | + printk("md: error, md_import_device() returned %d\n", err); | |
4131 | + return -EINVAL; | |
4132 | + } | |
4133 | + rdev = find_rdev_all(dev); | |
4134 | + if (!rdev) { | |
4135 | + MD_BUG(); | |
4136 | return -EINVAL; | |
4137 | + } | |
4138 | + | |
4139 | + rdev->old_dev = dev; | |
4140 | + rdev->desc_nr = info.number; | |
4141 | + | |
4142 | + bind_rdev_to_array(rdev, mddev); | |
4143 | + | |
4144 | + persistent = !mddev->sb->not_persistent; | |
4145 | + if (!persistent) | |
4146 | + printk("nonpersistent superblock ...\n"); | |
4147 | + if (!mddev->sb->chunk_size) | |
4148 | + printk("no chunksize?\n"); | |
4149 | + | |
4150 | + size = calc_dev_size(dev, mddev, persistent); | |
4151 | + rdev->sb_offset = calc_dev_sboffset(dev, mddev, persistent); | |
4152 | + | |
4153 | + if (!mddev->sb->size || (mddev->sb->size > size)) | |
4154 | + mddev->sb->size = size; | |
4155 | + } | |
4156 | + | |
4157 | + /* | |
4158 | + * sync all other superblocks with the main superblock | |
4159 | + */ | |
4160 | + sync_sbs(mddev); | |
4161 | + | |
4162 | + return 0; | |
4163 | +} | |
4164 | +#undef SET_SB | |
4165 | + | |
4166 | +static int hot_remove_disk (mddev_t * mddev, kdev_t dev) | |
4167 | +{ | |
4168 | + int err; | |
4169 | + mdk_rdev_t *rdev; | |
4170 | + mdp_disk_t *disk; | |
4171 | + | |
4172 | + if (!mddev->pers) | |
4173 | + return -ENODEV; | |
4174 | + | |
4175 | + printk("trying to remove %s from md%d ... \n", | |
4176 | + partition_name(dev), mdidx(mddev)); | |
4177 | + | |
4178 | + if (!mddev->pers->diskop) { | |
4179 | + printk("md%d: personality does not support diskops!\n", | |
4180 | + mdidx(mddev)); | |
4181 | + return -EINVAL; | |
4182 | + } | |
4183 | + | |
4184 | + rdev = find_rdev(mddev, dev); | |
4185 | + if (!rdev) | |
4186 | + return -ENXIO; | |
4187 | + | |
4188 | + if (rdev->desc_nr == -1) { | |
4189 | + MD_BUG(); | |
4190 | + return -EINVAL; | |
4191 | + } | |
4192 | + disk = &mddev->sb->disks[rdev->desc_nr]; | |
4193 | + if (disk_active(disk)) | |
4194 | + goto busy; | |
4195 | + if (disk_removed(disk)) { | |
4196 | + MD_BUG(); | |
4197 | + return -EINVAL; | |
4198 | + } | |
4199 | + | |
4200 | + err = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_DISK); | |
4201 | + if (err == -EBUSY) | |
4202 | + goto busy; | |
4203 | + if (err) { | |
4204 | + MD_BUG(); | |
4205 | + return -EINVAL; | |
4206 | + } | |
4207 | + | |
4208 | + remove_descriptor(disk, mddev->sb); | |
4209 | + kick_rdev_from_array(rdev); | |
4210 | + mddev->sb_dirty = 1; | |
4211 | + md_update_sb(mddev); | |
4212 | + | |
4213 | + return 0; | |
4214 | +busy: | |
4215 | + printk("cannot remove active disk %s from md%d ... \n", | |
4216 | + partition_name(dev), mdidx(mddev)); | |
4217 | + return -EBUSY; | |
4218 | +} | |
4219 | + | |
4220 | +static int hot_add_disk (mddev_t * mddev, kdev_t dev) | |
4221 | +{ | |
4222 | + int i, err, persistent; | |
4223 | + unsigned int size; | |
4224 | + mdk_rdev_t *rdev; | |
4225 | + mdp_disk_t *disk; | |
4226 | + | |
4227 | + if (!mddev->pers) | |
4228 | + return -ENODEV; | |
4229 | + | |
4230 | + printk("trying to hot-add %s to md%d ... \n", | |
4231 | + partition_name(dev), mdidx(mddev)); | |
4232 | + | |
4233 | + if (!mddev->pers->diskop) { | |
4234 | + printk("md%d: personality does not support diskops!\n", | |
4235 | + mdidx(mddev)); | |
4236 | + return -EINVAL; | |
4237 | + } | |
4238 | + | |
4239 | + persistent = !mddev->sb->not_persistent; | |
4240 | + size = calc_dev_size(dev, mddev, persistent); | |
4241 | + | |
4242 | + if (size < mddev->sb->size) { | |
4243 | + printk("md%d: disk size %d blocks < array size %d\n", | |
4244 | + mdidx(mddev), size, mddev->sb->size); | |
4245 | + return -ENOSPC; | |
4246 | + } | |
4247 | + | |
4248 | + rdev = find_rdev(mddev, dev); | |
4249 | + if (rdev) | |
4250 | + return -EBUSY; | |
4251 | + | |
4252 | + err = md_import_device (dev, 0); | |
4253 | + if (err) { | |
4254 | + printk("md: error, md_import_device() returned %d\n", err); | |
4255 | + return -EINVAL; | |
4256 | + } | |
4257 | + rdev = find_rdev_all(dev); | |
4258 | + if (!rdev) { | |
4259 | + MD_BUG(); | |
4260 | + return -EINVAL; | |
4261 | + } | |
4262 | + if (rdev->faulty) { | |
4263 | + printk("md: can not hot-add faulty %s disk to md%d!\n", | |
4264 | + partition_name(dev), mdidx(mddev)); | |
4265 | + err = -EINVAL; | |
4266 | + goto abort_export; | |
4267 | + } | |
4268 | + bind_rdev_to_array(rdev, mddev); | |
4269 | + | |
4270 | + /* | |
4271 | + * The rest should better be atomic, we can have disk failures | |
4272 | + * noticed in interrupt contexts ... | |
4273 | + */ | |
4274 | + cli(); | |
4275 | + rdev->old_dev = dev; | |
4276 | + rdev->size = size; | |
4277 | + rdev->sb_offset = calc_dev_sboffset(dev, mddev, persistent); | |
4278 | + | |
4279 | + disk = mddev->sb->disks + mddev->sb->raid_disks; | |
4280 | + for (i = mddev->sb->raid_disks; i < MD_SB_DISKS; i++) { | |
4281 | + disk = mddev->sb->disks + i; | |
4282 | + | |
4283 | + if (!disk->major && !disk->minor) | |
4284 | + break; | |
4285 | + if (disk_removed(disk)) | |
4286 | + break; | |
4287 | + } | |
4288 | + if (i == MD_SB_DISKS) { | |
4289 | + sti(); | |
4290 | + printk("md%d: can not hot-add to full array!\n", mdidx(mddev)); | |
4291 | + err = -EBUSY; | |
4292 | + goto abort_unbind_export; | |
4293 | + } | |
4294 | + | |
4295 | + if (disk_removed(disk)) { | |
4296 | /* | |
4297 | - * hot_add has to bump up nb_dev itself | |
4298 | + * reuse slot | |
4299 | */ | |
4300 | - if (md_dev[minor].pers->hot_add_disk (&md_dev[minor], dev)) { | |
4301 | - /* | |
4302 | - * FIXME: here we should free up the inode and stuff | |
4303 | - */ | |
4304 | - printk ("FIXME\n"); | |
4305 | - return -EINVAL; | |
4306 | + if (disk->number != i) { | |
4307 | + sti(); | |
4308 | + MD_BUG(); | |
4309 | + err = -EINVAL; | |
4310 | + goto abort_unbind_export; | |
4311 | } | |
4312 | - } else | |
4313 | - md_dev[minor].nb_dev++; | |
4314 | + } else { | |
4315 | + disk->number = i; | |
4316 | + } | |
4317 | ||
4318 | - printk ("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor); | |
4319 | - return (0); | |
4320 | + disk->raid_disk = disk->number; | |
4321 | + disk->major = MAJOR(dev); | |
4322 | + disk->minor = MINOR(dev); | |
4323 | + | |
4324 | + if (mddev->pers->diskop(mddev, &disk, DISKOP_HOT_ADD_DISK)) { | |
4325 | + sti(); | |
4326 | + MD_BUG(); | |
4327 | + err = -EINVAL; | |
4328 | + goto abort_unbind_export; | |
4329 | + } | |
4330 | + | |
4331 | + mark_disk_spare(disk); | |
4332 | + mddev->sb->nr_disks++; | |
4333 | + mddev->sb->spare_disks++; | |
4334 | + mddev->sb->working_disks++; | |
4335 | + | |
4336 | + mddev->sb_dirty = 1; | |
4337 | + | |
4338 | + sti(); | |
4339 | + md_update_sb(mddev); | |
4340 | + | |
4341 | + /* | |
4342 | + * Kick recovery, maybe this spare has to be added to the | |
4343 | + * array immediately. | |
4344 | + */ | |
4345 | + md_recover_arrays(); | |
4346 | + | |
4347 | + return 0; | |
4348 | + | |
4349 | +abort_unbind_export: | |
4350 | + unbind_rdev_from_array(rdev); | |
4351 | + | |
4352 | +abort_export: | |
4353 | + export_rdev(rdev); | |
4354 | + return err; | |
4355 | +} | |
4356 | + | |
4357 | +#define SET_SB(x) mddev->sb->x = info.x | |
4358 | +static int set_array_info (mddev_t * mddev, void * arg) | |
4359 | +{ | |
4360 | + mdu_array_info_t info; | |
4361 | + | |
4362 | + if (mddev->sb) { | |
4363 | + printk("array md%d already has a superblock!\n", | |
4364 | + mdidx(mddev)); | |
4365 | + return -EBUSY; | |
4366 | + } | |
4367 | + | |
4368 | + if (md_copy_from_user(&info, arg, sizeof(info))) | |
4369 | + return -EFAULT; | |
4370 | + | |
4371 | + if (alloc_array_sb(mddev)) | |
4372 | + return -ENOMEM; | |
4373 | + | |
4374 | + mddev->sb->major_version = MD_MAJOR_VERSION; | |
4375 | + mddev->sb->minor_version = MD_MINOR_VERSION; | |
4376 | + mddev->sb->patch_version = MD_PATCHLEVEL_VERSION; | |
4377 | + mddev->sb->ctime = CURRENT_TIME; | |
4378 | + | |
4379 | + SET_SB(level); | |
4380 | + SET_SB(size); | |
4381 | + SET_SB(nr_disks); | |
4382 | + SET_SB(raid_disks); | |
4383 | + SET_SB(md_minor); | |
4384 | + SET_SB(not_persistent); | |
4385 | + | |
4386 | + SET_SB(state); | |
4387 | + SET_SB(active_disks); | |
4388 | + SET_SB(working_disks); | |
4389 | + SET_SB(failed_disks); | |
4390 | + SET_SB(spare_disks); | |
4391 | + | |
4392 | + SET_SB(layout); | |
4393 | + SET_SB(chunk_size); | |
4394 | + | |
4395 | + mddev->sb->md_magic = MD_SB_MAGIC; | |
4396 | + | |
4397 | + /* | |
4398 | + * Generate a 128 bit UUID | |
4399 | + */ | |
4400 | + get_random_bytes(&mddev->sb->set_uuid0, 4); | |
4401 | + get_random_bytes(&mddev->sb->set_uuid1, 4); | |
4402 | + get_random_bytes(&mddev->sb->set_uuid2, 4); | |
4403 | + get_random_bytes(&mddev->sb->set_uuid3, 4); | |
4404 | + | |
4405 | + return 0; | |
4406 | +} | |
4407 | +#undef SET_SB | |
4408 | + | |
4409 | +static int set_disk_info (mddev_t * mddev, void * arg) | |
4410 | +{ | |
4411 | + printk("not yet"); | |
4412 | + return -EINVAL; | |
4413 | +} | |
4414 | + | |
4415 | +static int clear_array (mddev_t * mddev) | |
4416 | +{ | |
4417 | + printk("not yet"); | |
4418 | + return -EINVAL; | |
4419 | +} | |
4420 | + | |
4421 | +static int write_raid_info (mddev_t * mddev) | |
4422 | +{ | |
4423 | + printk("not yet"); | |
4424 | + return -EINVAL; | |
4425 | +} | |
4426 | + | |
4427 | +static int protect_array (mddev_t * mddev) | |
4428 | +{ | |
4429 | + printk("not yet"); | |
4430 | + return -EINVAL; | |
4431 | +} | |
4432 | + | |
4433 | +static int unprotect_array (mddev_t * mddev) | |
4434 | +{ | |
4435 | + printk("not yet"); | |
4436 | + return -EINVAL; | |
4437 | +} | |
4438 | + | |
4439 | +static int set_disk_faulty (mddev_t *mddev, kdev_t dev) | |
4440 | +{ | |
4441 | + int ret; | |
4442 | + | |
4443 | + fsync_dev(mddev_to_kdev(mddev)); | |
4444 | + ret = md_error(mddev_to_kdev(mddev), dev); | |
4445 | + return ret; | |
4446 | } | |
4447 | ||
4448 | static int md_ioctl (struct inode *inode, struct file *file, | |
4449 | unsigned int cmd, unsigned long arg) | |
4450 | { | |
4451 | - int minor, err; | |
4452 | - struct hd_geometry *loc = (struct hd_geometry *) arg; | |
4453 | + unsigned int minor; | |
4454 | + int err = 0; | |
4455 | + struct hd_geometry *loc = (struct hd_geometry *) arg; | |
4456 | + mddev_t *mddev = NULL; | |
4457 | + kdev_t dev; | |
4458 | ||
4459 | - if (!capable(CAP_SYS_ADMIN)) | |
4460 | - return -EACCES; | |
4461 | + if (!md_capable_admin()) | |
4462 | + return -EACCES; | |
4463 | ||
4464 | - if (((minor=MINOR(inode->i_rdev)) & 0x80) && | |
4465 | - (minor & 0x7f) < MAX_PERSONALITY && | |
4466 | - pers[minor & 0x7f] && | |
4467 | - pers[minor & 0x7f]->ioctl) | |
4468 | - return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg)); | |
4469 | - | |
4470 | - if (minor >= MAX_MD_DEV) | |
4471 | - return -EINVAL; | |
4472 | + dev = inode->i_rdev; | |
4473 | + minor = MINOR(dev); | |
4474 | + if (minor >= MAX_MD_DEVS) | |
4475 | + return -EINVAL; | |
4476 | ||
4477 | - switch (cmd) | |
4478 | - { | |
4479 | - case REGISTER_DEV: | |
4480 | - return do_md_add (minor, to_kdev_t ((dev_t) arg)); | |
4481 | + /* | |
4482 | + * Commands dealing with the RAID driver but not any | |
4483 | + * particular array: | |
4484 | + */ | |
4485 | + switch (cmd) | |
4486 | + { | |
4487 | + case RAID_VERSION: | |
4488 | + err = get_version((void *)arg); | |
4489 | + goto done; | |
4490 | + | |
4491 | + case PRINT_RAID_DEBUG: | |
4492 | + err = 0; | |
4493 | + md_print_devices(); | |
4494 | + goto done_unlock; | |
4495 | + | |
4496 | + case BLKGETSIZE: /* Return device size */ | |
4497 | + if (!arg) { | |
4498 | + err = -EINVAL; | |
4499 | + goto abort; | |
4500 | + } | |
4501 | + err = md_put_user(md_hd_struct[minor].nr_sects, | |
4502 | + (long *) arg); | |
4503 | + goto done; | |
4504 | ||
4505 | - case START_MD: | |
4506 | - return do_md_run (minor, (int) arg); | |
4507 | + case BLKFLSBUF: | |
4508 | + fsync_dev(dev); | |
4509 | + invalidate_buffers(dev); | |
4510 | + goto done; | |
4511 | ||
4512 | - case STOP_MD: | |
4513 | - return do_md_stop (minor, inode); | |
4514 | - | |
4515 | - case BLKGETSIZE: /* Return device size */ | |
4516 | - if (!arg) return -EINVAL; | |
4517 | - err = put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg); | |
4518 | - if (err) | |
4519 | - return err; | |
4520 | - break; | |
4521 | - | |
4522 | - case BLKFLSBUF: | |
4523 | - fsync_dev (inode->i_rdev); | |
4524 | - invalidate_buffers (inode->i_rdev); | |
4525 | - break; | |
4526 | - | |
4527 | - case BLKRASET: | |
4528 | - if (arg > 0xff) | |
4529 | - return -EINVAL; | |
4530 | - read_ahead[MAJOR(inode->i_rdev)] = arg; | |
4531 | - return 0; | |
4532 | - | |
4533 | - case BLKRAGET: | |
4534 | - if (!arg) return -EINVAL; | |
4535 | - err = put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg); | |
4536 | - if (err) | |
4537 | - return err; | |
4538 | - break; | |
4539 | - | |
4540 | - /* We have a problem here : there is no easy way to give a CHS | |
4541 | - virtual geometry. We currently pretend that we have a 2 heads | |
4542 | - 4 sectors (with a BIG number of cylinders...). This drives dosfs | |
4543 | - just mad... ;-) */ | |
4544 | - | |
4545 | - case HDIO_GETGEO: | |
4546 | - if (!loc) return -EINVAL; | |
4547 | - err = put_user (2, (char *) &loc->heads); | |
4548 | - if (err) | |
4549 | - return err; | |
4550 | - err = put_user (4, (char *) &loc->sectors); | |
4551 | - if (err) | |
4552 | - return err; | |
4553 | - err = put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders); | |
4554 | - if (err) | |
4555 | - return err; | |
4556 | - err = put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect, | |
4557 | - (long *) &loc->start); | |
4558 | - if (err) | |
4559 | - return err; | |
4560 | - break; | |
4561 | - | |
4562 | - RO_IOCTLS(inode->i_rdev,arg); | |
4563 | + case BLKRASET: | |
4564 | + if (arg > 0xff) { | |
4565 | + err = -EINVAL; | |
4566 | + goto abort; | |
4567 | + } | |
4568 | + read_ahead[MAJOR(dev)] = arg; | |
4569 | + goto done; | |
4570 | ||
4571 | - default: | |
4572 | - return -EINVAL; | |
4573 | - } | |
4574 | + case BLKRAGET: | |
4575 | + if (!arg) { | |
4576 | + err = -EINVAL; | |
4577 | + goto abort; | |
4578 | + } | |
4579 | + err = md_put_user (read_ahead[ | |
4580 | + MAJOR(dev)], (long *) arg); | |
4581 | + goto done; | |
4582 | + default: | |
4583 | + } | |
4584 | + | |
4585 | + /* | |
4586 | + * Commands creating/starting a new array: | |
4587 | + */ | |
4588 | + | |
4589 | + mddev = kdev_to_mddev(dev); | |
4590 | + | |
4591 | + switch (cmd) | |
4592 | + { | |
4593 | + case SET_ARRAY_INFO: | |
4594 | + case START_ARRAY: | |
4595 | + if (mddev) { | |
4596 | + printk("array md%d already exists!\n", | |
4597 | + mdidx(mddev)); | |
4598 | + err = -EEXIST; | |
4599 | + goto abort; | |
4600 | + } | |
4601 | + default: | |
4602 | + } | |
4603 | + | |
4604 | + switch (cmd) | |
4605 | + { | |
4606 | + case SET_ARRAY_INFO: | |
4607 | + mddev = alloc_mddev(dev); | |
4608 | + if (!mddev) { | |
4609 | + err = -ENOMEM; | |
4610 | + goto abort; | |
4611 | + } | |
4612 | + /* | |
4613 | + * alloc_mddev() should possibly self-lock. | |
4614 | + */ | |
4615 | + err = lock_mddev(mddev); | |
4616 | + if (err) { | |
4617 | + printk("ioctl, reason %d, cmd %d\n", err, cmd); | |
4618 | + goto abort; | |
4619 | + } | |
4620 | + err = set_array_info(mddev, (void *)arg); | |
4621 | + if (err) { | |
4622 | + printk("couldnt set array info. %d\n", err); | |
4623 | + goto abort; | |
4624 | + } | |
4625 | + goto done_unlock; | |
4626 | + | |
4627 | + case START_ARRAY: | |
4628 | + /* | |
4629 | + * possibly make it lock the array ... | |
4630 | + */ | |
4631 | + err = autostart_array((kdev_t)arg); | |
4632 | + if (err) { | |
4633 | + printk("autostart %s failed!\n", | |
4634 | + partition_name((kdev_t)arg)); | |
4635 | + goto abort; | |
4636 | + } | |
4637 | + goto done; | |
4638 | + | |
4639 | + default: | |
4640 | + } | |
4641 | + | |
4642 | + /* | |
4643 | + * Commands querying/configuring an existing array: | |
4644 | + */ | |
4645 | + | |
4646 | + if (!mddev) { | |
4647 | + err = -ENODEV; | |
4648 | + goto abort; | |
4649 | + } | |
4650 | + err = lock_mddev(mddev); | |
4651 | + if (err) { | |
4652 | + printk("ioctl lock interrupted, reason %d, cmd %d\n",err, cmd); | |
4653 | + goto abort; | |
4654 | + } | |
4655 | + | |
4656 | + /* | |
4657 | + * Commands even a read-only array can execute: | |
4658 | + */ | |
4659 | + switch (cmd) | |
4660 | + { | |
4661 | + case GET_ARRAY_INFO: | |
4662 | + err = get_array_info(mddev, (void *)arg); | |
4663 | + goto done_unlock; | |
4664 | + | |
4665 | + case GET_DISK_INFO: | |
4666 | + err = get_disk_info(mddev, (void *)arg); | |
4667 | + goto done_unlock; | |
4668 | + | |
4669 | + case RESTART_ARRAY_RW: | |
4670 | + err = restart_array(mddev); | |
4671 | + goto done_unlock; | |
4672 | + | |
4673 | + case STOP_ARRAY: | |
4674 | + err = do_md_stop (mddev, 0); | |
4675 | + goto done_unlock; | |
4676 | + | |
4677 | + case STOP_ARRAY_RO: | |
4678 | + err = do_md_stop (mddev, 1); | |
4679 | + goto done_unlock; | |
4680 | + | |
4681 | + /* | |
4682 | + * We have a problem here : there is no easy way to give a CHS | |
4683 | + * virtual geometry. We currently pretend that we have a 2 heads | |
4684 | + * 4 sectors (with a BIG number of cylinders...). This drives | |
4685 | + * dosfs just mad... ;-) | |
4686 | + */ | |
4687 | + case HDIO_GETGEO: | |
4688 | + if (!loc) { | |
4689 | + err = -EINVAL; | |
4690 | + goto abort_unlock; | |
4691 | + } | |
4692 | + err = md_put_user (2, (char *) &loc->heads); | |
4693 | + if (err) | |
4694 | + goto abort_unlock; | |
4695 | + err = md_put_user (4, (char *) &loc->sectors); | |
4696 | + if (err) | |
4697 | + goto abort_unlock; | |
4698 | + err = md_put_user (md_hd_struct[mdidx(mddev)].nr_sects/8, | |
4699 | + (short *) &loc->cylinders); | |
4700 | + if (err) | |
4701 | + goto abort_unlock; | |
4702 | + err = md_put_user (md_hd_struct[minor].start_sect, | |
4703 | + (long *) &loc->start); | |
4704 | + goto done_unlock; | |
4705 | + } | |
4706 | ||
4707 | - return (0); | |
4708 | + /* | |
4709 | + * The remaining ioctls are changing the state of the | |
4710 | + * superblock, so we do not allow read-only arrays | |
4711 | + * here: | |
4712 | + */ | |
4713 | + if (mddev->ro) { | |
4714 | + err = -EROFS; | |
4715 | + goto abort_unlock; | |
4716 | + } | |
4717 | + | |
4718 | + switch (cmd) | |
4719 | + { | |
4720 | + case CLEAR_ARRAY: | |
4721 | + err = clear_array(mddev); | |
4722 | + goto done_unlock; | |
4723 | + | |
4724 | + case ADD_NEW_DISK: | |
4725 | + err = add_new_disk(mddev, (void *)arg); | |
4726 | + goto done_unlock; | |
4727 | + | |
4728 | + case HOT_REMOVE_DISK: | |
4729 | + err = hot_remove_disk(mddev, (kdev_t)arg); | |
4730 | + goto done_unlock; | |
4731 | + | |
4732 | + case HOT_ADD_DISK: | |
4733 | + err = hot_add_disk(mddev, (kdev_t)arg); | |
4734 | + goto done_unlock; | |
4735 | + | |
4736 | + case SET_DISK_INFO: | |
4737 | + err = set_disk_info(mddev, (void *)arg); | |
4738 | + goto done_unlock; | |
4739 | + | |
4740 | + case WRITE_RAID_INFO: | |
4741 | + err = write_raid_info(mddev); | |
4742 | + goto done_unlock; | |
4743 | + | |
4744 | + case UNPROTECT_ARRAY: | |
4745 | + err = unprotect_array(mddev); | |
4746 | + goto done_unlock; | |
4747 | + | |
4748 | + case PROTECT_ARRAY: | |
4749 | + err = protect_array(mddev); | |
4750 | + goto done_unlock; | |
4751 | + | |
4752 | + case SET_DISK_FAULTY: | |
4753 | + err = set_disk_faulty(mddev, (kdev_t)arg); | |
4754 | + goto done_unlock; | |
4755 | + | |
4756 | + case RUN_ARRAY: | |
4757 | + { | |
4758 | + mdu_param_t param; | |
4759 | + | |
4760 | + err = md_copy_from_user(¶m, (mdu_param_t *)arg, | |
4761 | + sizeof(param)); | |
4762 | + if (err) | |
4763 | + goto abort_unlock; | |
4764 | + | |
4765 | + err = do_md_run (mddev); | |
4766 | + /* | |
4767 | + * we have to clean up the mess if | |
4768 | + * the array cannot be run for some | |
4769 | + * reason ... | |
4770 | + */ | |
4771 | + if (err) { | |
4772 | + mddev->sb_dirty = 0; | |
4773 | + do_md_stop (mddev, 0); | |
4774 | + } | |
4775 | + goto done_unlock; | |
4776 | + } | |
4777 | + | |
4778 | + default: | |
4779 | + printk(KERN_WARNING "%s(pid %d) used obsolete MD ioctl, upgrade your software to use new ictls.\n", current->comm, current->pid); | |
4780 | + err = -EINVAL; | |
4781 | + goto abort_unlock; | |
4782 | + } | |
4783 | + | |
4784 | +done_unlock: | |
4785 | +abort_unlock: | |
4786 | + if (mddev) | |
4787 | + unlock_mddev(mddev); | |
4788 | + else | |
4789 | + printk("huh11?\n"); | |
4790 | + | |
4791 | + return err; | |
4792 | +done: | |
4793 | + if (err) | |
4794 | + printk("huh12?\n"); | |
4795 | +abort: | |
4796 | + return err; | |
4797 | } | |
4798 | ||
4799 | + | |
4800 | +#if LINUX_VERSION_CODE < LinuxVersionCode(2,1,0) | |
4801 | + | |
4802 | static int md_open (struct inode *inode, struct file *file) | |
4803 | { | |
4804 | - int minor=MINOR(inode->i_rdev); | |
4805 | + /* | |
4806 | + * Always succeed | |
4807 | + */ | |
4808 | + return (0); | |
4809 | +} | |
4810 | + | |
4811 | +static void md_release (struct inode *inode, struct file *file) | |
4812 | +{ | |
4813 | + sync_dev(inode->i_rdev); | |
4814 | +} | |
4815 | + | |
4816 | + | |
4817 | +static int md_read (struct inode *inode, struct file *file, | |
4818 | + char *buf, int count) | |
4819 | +{ | |
4820 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
4821 | ||
4822 | - md_dev[minor].busy++; | |
4823 | - return (0); /* Always succeed */ | |
4824 | + if (!mddev || !mddev->pers) | |
4825 | + return -ENXIO; | |
4826 | + | |
4827 | + return block_read (inode, file, buf, count); | |
4828 | } | |
4829 | ||
4830 | +static int md_write (struct inode *inode, struct file *file, | |
4831 | + const char *buf, int count) | |
4832 | +{ | |
4833 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
4834 | + | |
4835 | + if (!mddev || !mddev->pers) | |
4836 | + return -ENXIO; | |
4837 | ||
4838 | -static int md_release (struct inode *inode, struct file *file) | |
4839 | + return block_write (inode, file, buf, count); | |
4840 | +} | |
4841 | + | |
4842 | +static struct file_operations md_fops= | |
4843 | { | |
4844 | - int minor=MINOR(inode->i_rdev); | |
4845 | + NULL, | |
4846 | + md_read, | |
4847 | + md_write, | |
4848 | + NULL, | |
4849 | + NULL, | |
4850 | + md_ioctl, | |
4851 | + NULL, | |
4852 | + md_open, | |
4853 | + md_release, | |
4854 | + block_fsync | |
4855 | +}; | |
4856 | + | |
4857 | +#else | |
4858 | ||
4859 | - sync_dev (inode->i_rdev); | |
4860 | - md_dev[minor].busy--; | |
4861 | - return 0; | |
4862 | +static int md_open (struct inode *inode, struct file *file) | |
4863 | +{ | |
4864 | + /* | |
4865 | + * Always succeed | |
4866 | + */ | |
4867 | + return (0); | |
4868 | } | |
4869 | ||
4870 | +static int md_release (struct inode *inode, struct file *file) | |
4871 | +{ | |
4872 | + sync_dev(inode->i_rdev); | |
4873 | + return 0; | |
4874 | +} | |
4875 | ||
4876 | static ssize_t md_read (struct file *file, char *buf, size_t count, | |
4877 | loff_t *ppos) | |
4878 | { | |
4879 | - int minor=MINOR(file->f_dentry->d_inode->i_rdev); | |
4880 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
4881 | ||
4882 | - if (!md_dev[minor].pers) /* Check if device is being run */ | |
4883 | - return -ENXIO; | |
4884 | + if (!mddev || !mddev->pers) | |
4885 | + return -ENXIO; | |
4886 | ||
4887 | - return block_read(file, buf, count, ppos); | |
4888 | + return block_read(file, buf, count, ppos); | |
4889 | } | |
4890 | ||
4891 | static ssize_t md_write (struct file *file, const char *buf, | |
4892 | size_t count, loff_t *ppos) | |
4893 | { | |
4894 | - int minor=MINOR(file->f_dentry->d_inode->i_rdev); | |
4895 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
4896 | ||
4897 | - if (!md_dev[minor].pers) /* Check if device is being run */ | |
4898 | - return -ENXIO; | |
4899 | + if (!mddev || !mddev->pers) | |
4900 | + return -ENXIO; | |
4901 | ||
4902 | - return block_write(file, buf, count, ppos); | |
4903 | + return block_write(file, buf, count, ppos); | |
4904 | } | |
4905 | ||
4906 | static struct file_operations md_fops= | |
4907 | { | |
4908 | - NULL, | |
4909 | - md_read, | |
4910 | - md_write, | |
4911 | - NULL, | |
4912 | - NULL, | |
4913 | - md_ioctl, | |
4914 | - NULL, | |
4915 | - md_open, | |
4916 | - NULL, | |
4917 | - md_release, | |
4918 | - block_fsync | |
4919 | + NULL, | |
4920 | + md_read, | |
4921 | + md_write, | |
4922 | + NULL, | |
4923 | + NULL, | |
4924 | + md_ioctl, | |
4925 | + NULL, | |
4926 | + md_open, | |
4927 | + NULL, | |
4928 | + md_release, | |
4929 | + block_fsync | |
4930 | }; | |
4931 | ||
4932 | -int md_map (int minor, kdev_t *rdev, unsigned long *rsector, unsigned long size) | |
4933 | +#endif | |
4934 | + | |
4935 | +int md_map (kdev_t dev, kdev_t *rdev, | |
4936 | + unsigned long *rsector, unsigned long size) | |
4937 | { | |
4938 | - if ((unsigned int) minor >= MAX_MD_DEV) | |
4939 | - { | |
4940 | - printk ("Bad md device %d\n", minor); | |
4941 | - return (-1); | |
4942 | - } | |
4943 | - | |
4944 | - if (!md_dev[minor].pers) | |
4945 | - { | |
4946 | - printk ("Oops ! md%d not running, giving up !\n", minor); | |
4947 | - return (-1); | |
4948 | - } | |
4949 | + int err; | |
4950 | + mddev_t *mddev = kdev_to_mddev(dev); | |
4951 | ||
4952 | - return (md_dev[minor].pers->map(md_dev+minor, rdev, rsector, size)); | |
4953 | + if (!mddev || !mddev->pers) { | |
4954 | + err = -ENXIO; | |
4955 | + goto out; | |
4956 | + } | |
4957 | + | |
4958 | + err = mddev->pers->map(mddev, dev, rdev, rsector, size); | |
4959 | +out: | |
4960 | + return err; | |
4961 | } | |
4962 | ||
4963 | -int md_make_request (int minor, int rw, struct buffer_head * bh) | |
4964 | +int md_make_request (struct buffer_head * bh, int rw) | |
4965 | { | |
4966 | - if (md_dev [minor].pers->make_request) { | |
4967 | - if (buffer_locked(bh)) | |
4968 | - return 0; | |
4969 | + int err; | |
4970 | + mddev_t *mddev = kdev_to_mddev(bh->b_dev); | |
4971 | + | |
4972 | + if (!mddev || !mddev->pers) { | |
4973 | + err = -ENXIO; | |
4974 | + goto out; | |
4975 | + } | |
4976 | + | |
4977 | + if (mddev->pers->make_request) { | |
4978 | + if (buffer_locked(bh)) { | |
4979 | + err = 0; | |
4980 | + goto out; | |
4981 | + } | |
4982 | set_bit(BH_Lock, &bh->b_state); | |
4983 | if (rw == WRITE || rw == WRITEA) { | |
4984 | if (!buffer_dirty(bh)) { | |
4985 | - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); | |
4986 | - return 0; | |
4987 | + bh->b_end_io(bh, buffer_uptodate(bh)); | |
4988 | + err = 0; | |
4989 | + goto out; | |
4990 | } | |
4991 | } | |
4992 | if (rw == READ || rw == READA) { | |
4993 | if (buffer_uptodate(bh)) { | |
4994 | - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); | |
4995 | - return 0; | |
4996 | + bh->b_end_io(bh, buffer_uptodate(bh)); | |
4997 | + err = 0; | |
4998 | + goto out; | |
4999 | } | |
5000 | } | |
5001 | - return (md_dev[minor].pers->make_request(md_dev+minor, rw, bh)); | |
5002 | + err = mddev->pers->make_request(mddev, rw, bh); | |
5003 | } else { | |
5004 | make_request (MAJOR(bh->b_rdev), rw, bh); | |
5005 | - return 0; | |
5006 | + err = 0; | |
5007 | } | |
5008 | +out: | |
5009 | + return err; | |
5010 | } | |
5011 | ||
5012 | static void do_md_request (void) | |
5013 | { | |
5014 | - printk ("Got md request, not good..."); | |
5015 | - return; | |
5016 | + printk(KERN_ALERT "Got md request, not good..."); | |
5017 | + return; | |
5018 | +} | |
5019 | + | |
5020 | +int md_thread(void * arg) | |
5021 | +{ | |
5022 | + mdk_thread_t *thread = arg; | |
5023 | + | |
5024 | + md_lock_kernel(); | |
5025 | + exit_mm(current); | |
5026 | + exit_files(current); | |
5027 | + exit_fs(current); | |
5028 | + | |
5029 | + /* | |
5030 | + * Detach thread | |
5031 | + */ | |
5032 | + sys_setsid(); | |
5033 | + sprintf(current->comm, thread->name); | |
5034 | + md_init_signals(); | |
5035 | + md_flush_signals(); | |
5036 | + thread->tsk = current; | |
5037 | + | |
5038 | + /* | |
5039 | + * md_thread is a 'system-thread', it's priority should be very | |
5040 | + * high. We avoid resource deadlocks individually in each | |
5041 | + * raid personality. (RAID5 does preallocation) We also use RR and | |
5042 | + * the very same RT priority as kswapd, thus we will never get | |
5043 | + * into a priority inversion deadlock. | |
5044 | + * | |
5045 | + * we definitely have to have equal or higher priority than | |
5046 | + * bdflush, otherwise bdflush will deadlock if there are too | |
5047 | + * many dirty RAID5 blocks. | |
5048 | + */ | |
5049 | + current->policy = SCHED_OTHER; | |
5050 | + current->priority = 40; | |
5051 | + | |
5052 | + up(thread->sem); | |
5053 | + | |
5054 | + for (;;) { | |
5055 | + cli(); | |
5056 | + if (!test_bit(THREAD_WAKEUP, &thread->flags)) { | |
5057 | + if (!thread->run) | |
5058 | + break; | |
5059 | + interruptible_sleep_on(&thread->wqueue); | |
5060 | + } | |
5061 | + sti(); | |
5062 | + clear_bit(THREAD_WAKEUP, &thread->flags); | |
5063 | + if (thread->run) { | |
5064 | + thread->run(thread->data); | |
5065 | + run_task_queue(&tq_disk); | |
5066 | + } | |
5067 | + if (md_signal_pending(current)) { | |
5068 | + printk("%8s(%d) flushing signals.\n", current->comm, | |
5069 | + current->pid); | |
5070 | + md_flush_signals(); | |
5071 | + } | |
5072 | + } | |
5073 | + sti(); | |
5074 | + up(thread->sem); | |
5075 | + return 0; | |
5076 | } | |
5077 | ||
5078 | -void md_wakeup_thread(struct md_thread *thread) | |
5079 | +void md_wakeup_thread(mdk_thread_t *thread) | |
5080 | { | |
5081 | set_bit(THREAD_WAKEUP, &thread->flags); | |
5082 | wake_up(&thread->wqueue); | |
5083 | } | |
5084 | ||
5085 | -struct md_thread *md_register_thread (void (*run) (void *), void *data) | |
5086 | +mdk_thread_t *md_register_thread (void (*run) (void *), | |
5087 | + void *data, const char *name) | |
5088 | { | |
5089 | - struct md_thread *thread = (struct md_thread *) | |
5090 | - kmalloc(sizeof(struct md_thread), GFP_KERNEL); | |
5091 | + mdk_thread_t *thread; | |
5092 | int ret; | |
5093 | struct semaphore sem = MUTEX_LOCKED; | |
5094 | ||
5095 | - if (!thread) return NULL; | |
5096 | + thread = (mdk_thread_t *) kmalloc | |
5097 | + (sizeof(mdk_thread_t), GFP_KERNEL); | |
5098 | + if (!thread) | |
5099 | + return NULL; | |
5100 | ||
5101 | - memset(thread, 0, sizeof(struct md_thread)); | |
5102 | + memset(thread, 0, sizeof(mdk_thread_t)); | |
5103 | init_waitqueue(&thread->wqueue); | |
5104 | ||
5105 | thread->sem = &sem; | |
5106 | thread->run = run; | |
5107 | thread->data = data; | |
5108 | + thread->name = name; | |
5109 | ret = kernel_thread(md_thread, thread, 0); | |
5110 | if (ret < 0) { | |
5111 | kfree(thread); | |
5112 | @@ -838,270 +3032,407 @@ | |
5113 | return thread; | |
5114 | } | |
5115 | ||
5116 | -void md_unregister_thread (struct md_thread *thread) | |
5117 | +void md_interrupt_thread (mdk_thread_t *thread) | |
5118 | +{ | |
5119 | + if (!thread->tsk) { | |
5120 | + MD_BUG(); | |
5121 | + return; | |
5122 | + } | |
5123 | + printk("interrupting MD-thread pid %d\n", thread->tsk->pid); | |
5124 | + send_sig(SIGKILL, thread->tsk, 1); | |
5125 | +} | |
5126 | + | |
5127 | +void md_unregister_thread (mdk_thread_t *thread) | |
5128 | { | |
5129 | struct semaphore sem = MUTEX_LOCKED; | |
5130 | ||
5131 | thread->sem = &sem; | |
5132 | thread->run = NULL; | |
5133 | - if (thread->tsk) | |
5134 | - printk("Killing md_thread %d %p %s\n", | |
5135 | - thread->tsk->pid, thread->tsk, thread->tsk->comm); | |
5136 | - else | |
5137 | - printk("Aiee. md_thread has 0 tsk\n"); | |
5138 | - send_sig(SIGKILL, thread->tsk, 1); | |
5139 | - printk("downing on %p\n", &sem); | |
5140 | + thread->name = NULL; | |
5141 | + if (!thread->tsk) { | |
5142 | + MD_BUG(); | |
5143 | + return; | |
5144 | + } | |
5145 | + md_interrupt_thread(thread); | |
5146 | down(&sem); | |
5147 | } | |
5148 | ||
5149 | -#define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGTERM)) | |
5150 | - | |
5151 | -int md_thread(void * arg) | |
5152 | +void md_recover_arrays (void) | |
5153 | { | |
5154 | - struct md_thread *thread = arg; | |
5155 | - | |
5156 | - lock_kernel(); | |
5157 | - exit_mm(current); | |
5158 | - exit_files(current); | |
5159 | - exit_fs(current); | |
5160 | - | |
5161 | - current->session = 1; | |
5162 | - current->pgrp = 1; | |
5163 | - sprintf(current->comm, "md_thread"); | |
5164 | - siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); | |
5165 | - thread->tsk = current; | |
5166 | - up(thread->sem); | |
5167 | - | |
5168 | - for (;;) { | |
5169 | - cli(); | |
5170 | - if (!test_bit(THREAD_WAKEUP, &thread->flags)) { | |
5171 | - do { | |
5172 | - spin_lock(¤t->sigmask_lock); | |
5173 | - flush_signals(current); | |
5174 | - spin_unlock(¤t->sigmask_lock); | |
5175 | - interruptible_sleep_on(&thread->wqueue); | |
5176 | - cli(); | |
5177 | - if (test_bit(THREAD_WAKEUP, &thread->flags)) | |
5178 | - break; | |
5179 | - if (!thread->run) { | |
5180 | - sti(); | |
5181 | - up(thread->sem); | |
5182 | - return 0; | |
5183 | - } | |
5184 | - } while (signal_pending(current)); | |
5185 | - } | |
5186 | - sti(); | |
5187 | - clear_bit(THREAD_WAKEUP, &thread->flags); | |
5188 | - if (thread->run) { | |
5189 | - thread->run(thread->data); | |
5190 | - run_task_queue(&tq_disk); | |
5191 | - } | |
5192 | + if (!md_recovery_thread) { | |
5193 | + MD_BUG(); | |
5194 | + return; | |
5195 | } | |
5196 | + md_wakeup_thread(md_recovery_thread); | |
5197 | } | |
5198 | ||
5199 | -EXPORT_SYMBOL(md_size); | |
5200 | -EXPORT_SYMBOL(md_maxreadahead); | |
5201 | -EXPORT_SYMBOL(register_md_personality); | |
5202 | -EXPORT_SYMBOL(unregister_md_personality); | |
5203 | -EXPORT_SYMBOL(partition_name); | |
5204 | -EXPORT_SYMBOL(md_dev); | |
5205 | -EXPORT_SYMBOL(md_error); | |
5206 | -EXPORT_SYMBOL(md_register_thread); | |
5207 | -EXPORT_SYMBOL(md_unregister_thread); | |
5208 | -EXPORT_SYMBOL(md_update_sb); | |
5209 | -EXPORT_SYMBOL(md_map); | |
5210 | -EXPORT_SYMBOL(md_wakeup_thread); | |
5211 | -EXPORT_SYMBOL(md_do_sync); | |
5212 | ||
5213 | -#ifdef CONFIG_PROC_FS | |
5214 | -static struct proc_dir_entry proc_md = { | |
5215 | - PROC_MD, 6, "mdstat", | |
5216 | - S_IFREG | S_IRUGO, 1, 0, 0, | |
5217 | - 0, &proc_array_inode_operations, | |
5218 | -}; | |
5219 | +int md_error (kdev_t dev, kdev_t rdev) | |
5220 | +{ | |
5221 | + mddev_t *mddev = kdev_to_mddev(dev); | |
5222 | + mdk_rdev_t * rrdev; | |
5223 | + int rc; | |
5224 | + | |
5225 | + if (!mddev) { | |
5226 | + MD_BUG(); | |
5227 | + return 0; | |
5228 | + } | |
5229 | + rrdev = find_rdev(mddev, rdev); | |
5230 | + mark_rdev_faulty(rrdev); | |
5231 | + /* | |
5232 | + * if recovery was running, stop it now. | |
5233 | + */ | |
5234 | + if (mddev->pers->stop_resync) | |
5235 | + mddev->pers->stop_resync(mddev); | |
5236 | + if (mddev->recovery_running) | |
5237 | + md_interrupt_thread(md_recovery_thread); | |
5238 | + if (mddev->pers->error_handler) { | |
5239 | + rc = mddev->pers->error_handler(mddev, rdev); | |
5240 | + md_recover_arrays(); | |
5241 | + return rc; | |
5242 | + } | |
5243 | +#if 0 | |
5244 | + /* | |
5245 | + * Drop all buffers in the failed array. | |
5246 | + * _not_. This is called from IRQ handlers ... | |
5247 | + */ | |
5248 | + invalidate_buffers(rdev); | |
5249 | #endif | |
5250 | + return 0; | |
5251 | +} | |
5252 | ||
5253 | -static void md_geninit (struct gendisk *gdisk) | |
5254 | +static int status_unused (char * page) | |
5255 | { | |
5256 | - int i; | |
5257 | - | |
5258 | - for(i=0;i<MAX_MD_DEV;i++) | |
5259 | - { | |
5260 | - md_blocksizes[i] = 1024; | |
5261 | - md_maxreadahead[i] = MD_DEFAULT_DISK_READAHEAD; | |
5262 | - md_gendisk.part[i].start_sect=-1; /* avoid partition check */ | |
5263 | - md_gendisk.part[i].nr_sects=0; | |
5264 | - md_dev[i].pers=NULL; | |
5265 | - } | |
5266 | + int sz = 0, i = 0; | |
5267 | + mdk_rdev_t *rdev; | |
5268 | + struct md_list_head *tmp; | |
5269 | ||
5270 | - blksize_size[MD_MAJOR] = md_blocksizes; | |
5271 | - max_readahead[MD_MAJOR] = md_maxreadahead; | |
5272 | + sz += sprintf(page + sz, "unused devices: "); | |
5273 | ||
5274 | -#ifdef CONFIG_PROC_FS | |
5275 | - proc_register(&proc_root, &proc_md); | |
5276 | -#endif | |
5277 | + ITERATE_RDEV_ALL(rdev,tmp) { | |
5278 | + if (!rdev->same_set.next && !rdev->same_set.prev) { | |
5279 | + /* | |
5280 | + * The device is not yet used by any array. | |
5281 | + */ | |
5282 | + i++; | |
5283 | + sz += sprintf(page + sz, "%s ", | |
5284 | + partition_name(rdev->dev)); | |
5285 | + } | |
5286 | + } | |
5287 | + if (!i) | |
5288 | + sz += sprintf(page + sz, "<none>"); | |
5289 | + | |
5290 | + sz += sprintf(page + sz, "\n"); | |
5291 | + return sz; | |
5292 | } | |
5293 | ||
5294 | -int md_error (kdev_t mddev, kdev_t rdev) | |
5295 | + | |
5296 | +static int status_resync (char * page, mddev_t * mddev) | |
5297 | { | |
5298 | - unsigned int minor = MINOR (mddev); | |
5299 | - int rc; | |
5300 | + int sz = 0; | |
5301 | + unsigned int blocksize, max_blocks, resync, res, dt, tt, et; | |
5302 | ||
5303 | - if (MAJOR(mddev) != MD_MAJOR || minor > MAX_MD_DEV) | |
5304 | - panic ("md_error gets unknown device\n"); | |
5305 | - if (!md_dev [minor].pers) | |
5306 | - panic ("md_error gets an error for an unknown device\n"); | |
5307 | - if (md_dev [minor].pers->error_handler) { | |
5308 | - rc = md_dev [minor].pers->error_handler (md_dev+minor, rdev); | |
5309 | -#if SUPPORT_RECONSTRUCTION | |
5310 | - md_wakeup_thread(md_sync_thread); | |
5311 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
5312 | - return rc; | |
5313 | - } | |
5314 | - return 0; | |
5315 | + resync = mddev->curr_resync; | |
5316 | + blocksize = blksize_size[MD_MAJOR][mdidx(mddev)]; | |
5317 | + max_blocks = blk_size[MD_MAJOR][mdidx(mddev)] / (blocksize >> 10); | |
5318 | + | |
5319 | + /* | |
5320 | + * Should not happen. | |
5321 | + */ | |
5322 | + if (!max_blocks) { | |
5323 | + MD_BUG(); | |
5324 | + return 0; | |
5325 | + } | |
5326 | + res = resync*100/max_blocks; | |
5327 | + if (!mddev->recovery_running) | |
5328 | + /* | |
5329 | + * true resync | |
5330 | + */ | |
5331 | + sz += sprintf(page + sz, " resync=%u%%", res); | |
5332 | + else | |
5333 | + /* | |
5334 | + * recovery ... | |
5335 | + */ | |
5336 | + sz += sprintf(page + sz, " recovery=%u%%", res); | |
5337 | + | |
5338 | + /* | |
5339 | + * We do not want to overflow, so the order of operands and | |
5340 | + * the * 100 / 100 trick are important. We do a +1 to be | |
5341 | + * safe against division by zero. We only estimate anyway. | |
5342 | + * | |
5343 | + * dt: time until now | |
5344 | + * tt: total time | |
5345 | + * et: estimated finish time | |
5346 | + */ | |
5347 | + dt = ((jiffies - mddev->resync_start) / HZ); | |
5348 | + tt = (dt * (max_blocks / (resync/100+1)))/100; | |
5349 | + if (tt > dt) | |
5350 | + et = tt - dt; | |
5351 | + else | |
5352 | + /* | |
5353 | + * ignore rounding effects near finish time | |
5354 | + */ | |
5355 | + et = 0; | |
5356 | + | |
5357 | + sz += sprintf(page + sz, " finish=%u.%umin", et / 60, (et % 60)/6); | |
5358 | + | |
5359 | + return sz; | |
5360 | } | |
5361 | ||
5362 | int get_md_status (char *page) | |
5363 | { | |
5364 | - int sz=0, i, j, size; | |
5365 | - | |
5366 | - sz+=sprintf( page+sz, "Personalities : "); | |
5367 | - for (i=0; i<MAX_PERSONALITY; i++) | |
5368 | - if (pers[i]) | |
5369 | - sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name); | |
5370 | - | |
5371 | - page[sz-1]='\n'; | |
5372 | - | |
5373 | - sz+=sprintf (page+sz, "read_ahead "); | |
5374 | - if (read_ahead[MD_MAJOR]==INT_MAX) | |
5375 | - sz+=sprintf (page+sz, "not set\n"); | |
5376 | - else | |
5377 | - sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]); | |
5378 | + int sz = 0, j, size; | |
5379 | + struct md_list_head *tmp, *tmp2; | |
5380 | + mdk_rdev_t *rdev; | |
5381 | + mddev_t *mddev; | |
5382 | + | |
5383 | + sz += sprintf(page + sz, "Personalities : "); | |
5384 | + for (j = 0; j < MAX_PERSONALITY; j++) | |
5385 | + if (pers[j]) | |
5386 | + sz += sprintf(page+sz, "[%s] ", pers[j]->name); | |
5387 | + | |
5388 | + sz += sprintf(page+sz, "\n"); | |
5389 | + | |
5390 | + | |
5391 | + sz += sprintf(page+sz, "read_ahead "); | |
5392 | + if (read_ahead[MD_MAJOR] == INT_MAX) | |
5393 | + sz += sprintf(page+sz, "not set\n"); | |
5394 | + else | |
5395 | + sz += sprintf(page+sz, "%d sectors\n", read_ahead[MD_MAJOR]); | |
5396 | ||
5397 | - for (i=0; i<MAX_MD_DEV; i++) | |
5398 | - { | |
5399 | - sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in"); | |
5400 | - | |
5401 | - if (md_dev[i].pers) | |
5402 | - sz+=sprintf (page+sz, " %s", md_dev[i].pers->name); | |
5403 | + ITERATE_MDDEV(mddev,tmp) { | |
5404 | + sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev), | |
5405 | + mddev->pers ? "" : "in"); | |
5406 | + if (mddev->pers) { | |
5407 | + if (mddev->ro) | |
5408 | + sz += sprintf(page + sz, " (read-only)"); | |
5409 | + sz += sprintf(page + sz, " %s", mddev->pers->name); | |
5410 | + } | |
5411 | ||
5412 | - size=0; | |
5413 | - for (j=0; j<md_dev[i].nb_dev; j++) | |
5414 | - { | |
5415 | - sz+=sprintf (page+sz, " %s", | |
5416 | - partition_name(md_dev[i].devices[j].dev)); | |
5417 | - size+=md_dev[i].devices[j].size; | |
5418 | - } | |
5419 | + size = 0; | |
5420 | + ITERATE_RDEV(mddev,rdev,tmp2) { | |
5421 | + sz += sprintf(page + sz, " %s[%d]", | |
5422 | + partition_name(rdev->dev), rdev->desc_nr); | |
5423 | + if (rdev->faulty) { | |
5424 | + sz += sprintf(page + sz, "(F)"); | |
5425 | + continue; | |
5426 | + } | |
5427 | + size += rdev->size; | |
5428 | + } | |
5429 | ||
5430 | - if (md_dev[i].nb_dev) { | |
5431 | - if (md_dev[i].pers) | |
5432 | - sz+=sprintf (page+sz, " %d blocks", md_size[i]); | |
5433 | - else | |
5434 | - sz+=sprintf (page+sz, " %d blocks", size); | |
5435 | - } | |
5436 | + if (mddev->nb_dev) { | |
5437 | + if (mddev->pers) | |
5438 | + sz += sprintf(page + sz, " %d blocks", | |
5439 | + md_size[mdidx(mddev)]); | |
5440 | + else | |
5441 | + sz += sprintf(page + sz, " %d blocks", size); | |
5442 | + } | |
5443 | ||
5444 | - if (!md_dev[i].pers) | |
5445 | - { | |
5446 | - sz+=sprintf (page+sz, "\n"); | |
5447 | - continue; | |
5448 | - } | |
5449 | + if (!mddev->pers) { | |
5450 | + sz += sprintf(page+sz, "\n"); | |
5451 | + continue; | |
5452 | + } | |
5453 | ||
5454 | - if (md_dev[i].pers->max_invalid_dev) | |
5455 | - sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i)); | |
5456 | + sz += mddev->pers->status (page+sz, mddev); | |
5457 | ||
5458 | - sz+=md_dev[i].pers->status (page+sz, i, md_dev+i); | |
5459 | - sz+=sprintf (page+sz, "\n"); | |
5460 | - } | |
5461 | + if (mddev->curr_resync) | |
5462 | + sz += status_resync (page+sz, mddev); | |
5463 | + else { | |
5464 | + if (md_atomic_read(&mddev->resync_sem.count) != 1) | |
5465 | + sz += sprintf(page + sz, " resync=DELAYED"); | |
5466 | + } | |
5467 | + sz += sprintf(page + sz, "\n"); | |
5468 | + } | |
5469 | + sz += status_unused (page + sz); | |
5470 | ||
5471 | - return (sz); | |
5472 | + return (sz); | |
5473 | } | |
5474 | ||
5475 | -int register_md_personality (int p_num, struct md_personality *p) | |
5476 | +int register_md_personality (int pnum, mdk_personality_t *p) | |
5477 | { | |
5478 | - int i=(p_num >> PERSONALITY_SHIFT); | |
5479 | - | |
5480 | - if (i >= MAX_PERSONALITY) | |
5481 | - return -EINVAL; | |
5482 | + if (pnum >= MAX_PERSONALITY) | |
5483 | + return -EINVAL; | |
5484 | ||
5485 | - if (pers[i]) | |
5486 | - return -EBUSY; | |
5487 | + if (pers[pnum]) | |
5488 | + return -EBUSY; | |
5489 | ||
5490 | - pers[i]=p; | |
5491 | - printk ("%s personality registered\n", p->name); | |
5492 | - return 0; | |
5493 | + pers[pnum] = p; | |
5494 | + printk(KERN_INFO "%s personality registered\n", p->name); | |
5495 | + return 0; | |
5496 | } | |
5497 | ||
5498 | -int unregister_md_personality (int p_num) | |
5499 | +int unregister_md_personality (int pnum) | |
5500 | { | |
5501 | - int i=(p_num >> PERSONALITY_SHIFT); | |
5502 | - | |
5503 | - if (i >= MAX_PERSONALITY) | |
5504 | - return -EINVAL; | |
5505 | + if (pnum >= MAX_PERSONALITY) | |
5506 | + return -EINVAL; | |
5507 | ||
5508 | - printk ("%s personality unregistered\n", pers[i]->name); | |
5509 | - pers[i]=NULL; | |
5510 | - return 0; | |
5511 | + printk(KERN_INFO "%s personality unregistered\n", pers[pnum]->name); | |
5512 | + pers[pnum] = NULL; | |
5513 | + return 0; | |
5514 | } | |
5515 | ||
5516 | -static md_descriptor_t *get_spare(struct md_dev *mddev) | |
5517 | +static mdp_disk_t *get_spare(mddev_t *mddev) | |
5518 | { | |
5519 | - int i; | |
5520 | - md_superblock_t *sb = mddev->sb; | |
5521 | - md_descriptor_t *descriptor; | |
5522 | - struct real_dev *realdev; | |
5523 | - | |
5524 | - for (i = 0; i < mddev->nb_dev; i++) { | |
5525 | - realdev = &mddev->devices[i]; | |
5526 | - if (!realdev->sb) | |
5527 | + mdp_super_t *sb = mddev->sb; | |
5528 | + mdp_disk_t *disk; | |
5529 | + mdk_rdev_t *rdev; | |
5530 | + struct md_list_head *tmp; | |
5531 | + | |
5532 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5533 | + if (rdev->faulty) | |
5534 | + continue; | |
5535 | + if (!rdev->sb) { | |
5536 | + MD_BUG(); | |
5537 | continue; | |
5538 | - descriptor = &sb->disks[realdev->sb->descriptor.number]; | |
5539 | - if (descriptor->state & (1 << MD_FAULTY_DEVICE)) | |
5540 | + } | |
5541 | + disk = &sb->disks[rdev->desc_nr]; | |
5542 | + if (disk_faulty(disk)) { | |
5543 | + MD_BUG(); | |
5544 | continue; | |
5545 | - if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) | |
5546 | + } | |
5547 | + if (disk_active(disk)) | |
5548 | continue; | |
5549 | - return descriptor; | |
5550 | + return disk; | |
5551 | } | |
5552 | return NULL; | |
5553 | } | |
5554 | ||
5555 | +static int is_mddev_idle (mddev_t *mddev) | |
5556 | +{ | |
5557 | + mdk_rdev_t * rdev; | |
5558 | + struct md_list_head *tmp; | |
5559 | + int idle; | |
5560 | + unsigned long curr_events; | |
5561 | + | |
5562 | + idle = 1; | |
5563 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5564 | + curr_events = io_events[MAJOR(rdev->dev)]; | |
5565 | + | |
5566 | + if (curr_events != rdev->last_events) { | |
5567 | +// printk("!I(%d)", curr_events-rdev->last_events); | |
5568 | + rdev->last_events = curr_events; | |
5569 | + idle = 0; | |
5570 | + } | |
5571 | + } | |
5572 | + return idle; | |
5573 | +} | |
5574 | + | |
5575 | /* | |
5576 | * parallel resyncing thread. | |
5577 | - * | |
5578 | - * FIXME: - make it abort with a dirty array on mdstop, now it just blocks | |
5579 | - * - fix read error handing | |
5580 | */ | |
5581 | ||
5582 | -int md_do_sync(struct md_dev *mddev) | |
5583 | +/* | |
5584 | + * Determine correct block size for this device. | |
5585 | + */ | |
5586 | +unsigned int device_bsize (kdev_t dev) | |
5587 | +{ | |
5588 | + unsigned int i, correct_size; | |
5589 | + | |
5590 | + correct_size = BLOCK_SIZE; | |
5591 | + if (blksize_size[MAJOR(dev)]) { | |
5592 | + i = blksize_size[MAJOR(dev)][MINOR(dev)]; | |
5593 | + if (i) | |
5594 | + correct_size = i; | |
5595 | + } | |
5596 | + | |
5597 | + return correct_size; | |
5598 | +} | |
5599 | + | |
5600 | +static struct wait_queue *resync_wait = (struct wait_queue *)NULL; | |
5601 | + | |
5602 | +#define RA_ORDER (1) | |
5603 | +#define RA_PAGE_SIZE (PAGE_SIZE*(1<<RA_ORDER)) | |
5604 | +#define MAX_NR_BLOCKS (RA_PAGE_SIZE/sizeof(struct buffer_head *)) | |
5605 | + | |
5606 | +int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) | |
5607 | { | |
5608 | - struct buffer_head *bh; | |
5609 | - int max_blocks, blocksize, curr_bsize, percent=1, j; | |
5610 | - kdev_t read_disk = MKDEV(MD_MAJOR, mddev - md_dev); | |
5611 | + mddev_t *mddev2; | |
5612 | + struct buffer_head **bh; | |
5613 | + unsigned int max_blocks, blocksize, curr_bsize, | |
5614 | + i, ii, j, k, chunk, window, nr_blocks, err, serialize; | |
5615 | + kdev_t read_disk = mddev_to_kdev(mddev); | |
5616 | int major = MAJOR(read_disk), minor = MINOR(read_disk); | |
5617 | unsigned long starttime; | |
5618 | + int max_read_errors = 2*MAX_NR_BLOCKS, | |
5619 | + max_write_errors = 2*MAX_NR_BLOCKS; | |
5620 | + struct md_list_head *tmp; | |
5621 | + | |
5622 | +retry_alloc: | |
5623 | + bh = (struct buffer_head **) md__get_free_pages(GFP_KERNEL, RA_ORDER); | |
5624 | + if (!bh) { | |
5625 | + printk(KERN_ERR | |
5626 | + "could not alloc bh array for reconstruction ... retrying!\n"); | |
5627 | + goto retry_alloc; | |
5628 | + } | |
5629 | + | |
5630 | + err = down_interruptible(&mddev->resync_sem); | |
5631 | + if (err) | |
5632 | + goto out_nolock; | |
5633 | + | |
5634 | +recheck: | |
5635 | + serialize = 0; | |
5636 | + ITERATE_MDDEV(mddev2,tmp) { | |
5637 | + if (mddev2 == mddev) | |
5638 | + continue; | |
5639 | + if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) { | |
5640 | + printk(KERN_INFO "md: serializing resync, md%d has overlapping physical units with md%d!\n", mdidx(mddev), mdidx(mddev2)); | |
5641 | + serialize = 1; | |
5642 | + break; | |
5643 | + } | |
5644 | + } | |
5645 | + if (serialize) { | |
5646 | + interruptible_sleep_on(&resync_wait); | |
5647 | + if (md_signal_pending(current)) { | |
5648 | + md_flush_signals(); | |
5649 | + err = -EINTR; | |
5650 | + goto out; | |
5651 | + } | |
5652 | + goto recheck; | |
5653 | + } | |
5654 | + | |
5655 | + mddev->curr_resync = 1; | |
5656 | ||
5657 | - blocksize = blksize_size[major][minor]; | |
5658 | + blocksize = device_bsize(read_disk); | |
5659 | max_blocks = blk_size[major][minor] / (blocksize >> 10); | |
5660 | ||
5661 | - printk("... resync log\n"); | |
5662 | - printk(" .... mddev->nb_dev: %d\n", mddev->nb_dev); | |
5663 | - printk(" .... raid array: %s\n", kdevname(read_disk)); | |
5664 | - printk(" .... max_blocks: %d blocksize: %d\n", max_blocks, blocksize); | |
5665 | - printk("md: syncing RAID array %s\n", kdevname(read_disk)); | |
5666 | + printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev)); | |
5667 | + printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed: %d KB/sec.\n", | |
5668 | + sysctl_speed_limit); | |
5669 | + printk(KERN_INFO "md: using maximum available idle IO bandwith for reconstruction.\n"); | |
5670 | + | |
5671 | + /* | |
5672 | + * Resync has low priority. | |
5673 | + */ | |
5674 | + current->priority = 1; | |
5675 | + | |
5676 | + is_mddev_idle(mddev); /* this also initializes IO event counters */ | |
5677 | + starttime = jiffies; | |
5678 | + mddev->resync_start = starttime; | |
5679 | ||
5680 | - mddev->busy++; | |
5681 | + /* | |
5682 | + * Tune reconstruction: | |
5683 | + */ | |
5684 | + window = md_maxreadahead[mdidx(mddev)]/1024; | |
5685 | + nr_blocks = window / (blocksize >> 10); | |
5686 | + if (!nr_blocks || (nr_blocks > MAX_NR_BLOCKS)) | |
5687 | + nr_blocks = MAX_NR_BLOCKS; | |
5688 | + printk(KERN_INFO "md: using %dk window.\n",window); | |
5689 | ||
5690 | - starttime=jiffies; | |
5691 | - for (j = 0; j < max_blocks; j++) { | |
5692 | + for (j = 0; j < max_blocks; j += nr_blocks) { | |
5693 | ||
5694 | + if (j) | |
5695 | + mddev->curr_resync = j; | |
5696 | /* | |
5697 | * B careful. When some1 mounts a non-'blocksize' filesystem | |
5698 | * then we get the blocksize changed right under us. Go deal | |
5699 | * with it transparently, recalculate 'blocksize', 'j' and | |
5700 | * 'max_blocks': | |
5701 | */ | |
5702 | - curr_bsize = blksize_size[major][minor]; | |
5703 | + curr_bsize = device_bsize(read_disk); | |
5704 | if (curr_bsize != blocksize) { | |
5705 | - diff_blocksize: | |
5706 | + printk(KERN_INFO "md%d: blocksize changed\n", | |
5707 | + mdidx(mddev)); | |
5708 | +retry_read: | |
5709 | if (curr_bsize > blocksize) | |
5710 | /* | |
5711 | * this is safe, rounds downwards. | |
5712 | @@ -1111,114 +3442,384 @@ | |
5713 | j *= blocksize/curr_bsize; | |
5714 | ||
5715 | blocksize = curr_bsize; | |
5716 | + nr_blocks = window / (blocksize >> 10); | |
5717 | + if (!nr_blocks || (nr_blocks > MAX_NR_BLOCKS)) | |
5718 | + nr_blocks = MAX_NR_BLOCKS; | |
5719 | max_blocks = blk_size[major][minor] / (blocksize >> 10); | |
5720 | - } | |
5721 | - if ((bh = breada (read_disk, j, blocksize, j * blocksize, | |
5722 | - max_blocks * blocksize)) != NULL) { | |
5723 | - mark_buffer_dirty(bh, 1); | |
5724 | - brelse(bh); | |
5725 | - } else { | |
5726 | + printk("nr_blocks changed to %d (blocksize %d, j %d, max_blocks %d)\n", | |
5727 | + nr_blocks, blocksize, j, max_blocks); | |
5728 | /* | |
5729 | - * FIXME: Ugly, but set_blocksize() isnt safe ... | |
5730 | + * We will retry the current block-group | |
5731 | */ | |
5732 | - curr_bsize = blksize_size[major][minor]; | |
5733 | - if (curr_bsize != blocksize) | |
5734 | - goto diff_blocksize; | |
5735 | + } | |
5736 | ||
5737 | - /* | |
5738 | - * It's a real read problem. FIXME, handle this | |
5739 | - * a better way. | |
5740 | - */ | |
5741 | - printk ( KERN_ALERT | |
5742 | - "read error, stopping reconstruction.\n"); | |
5743 | - mddev->busy--; | |
5744 | - return 1; | |
5745 | + /* | |
5746 | + * Cleanup routines expect this | |
5747 | + */ | |
5748 | + for (k = 0; k < nr_blocks; k++) | |
5749 | + bh[k] = NULL; | |
5750 | + | |
5751 | + chunk = nr_blocks; | |
5752 | + if (chunk > max_blocks-j) | |
5753 | + chunk = max_blocks-j; | |
5754 | + | |
5755 | + /* | |
5756 | + * request buffer heads ... | |
5757 | + */ | |
5758 | + for (i = 0; i < chunk; i++) { | |
5759 | + bh[i] = getblk (read_disk, j+i, blocksize); | |
5760 | + if (!bh[i]) | |
5761 | + goto read_error; | |
5762 | + if (!buffer_dirty(bh[i])) | |
5763 | + mark_buffer_lowprio(bh[i]); | |
5764 | } | |
5765 | ||
5766 | /* | |
5767 | - * Let's sleep some if we are faster than our speed limit: | |
5768 | + * read buffer heads ... | |
5769 | */ | |
5770 | - while (blocksize*j/(jiffies-starttime+1)*HZ/1024 > SPEED_LIMIT) | |
5771 | - { | |
5772 | - current->state = TASK_INTERRUPTIBLE; | |
5773 | - schedule_timeout(1); | |
5774 | + ll_rw_block (READ, chunk, bh); | |
5775 | + run_task_queue(&tq_disk); | |
5776 | + | |
5777 | + /* | |
5778 | + * verify that all of them are OK ... | |
5779 | + */ | |
5780 | + for (i = 0; i < chunk; i++) { | |
5781 | + ii = chunk-i-1; | |
5782 | + wait_on_buffer(bh[ii]); | |
5783 | + if (!buffer_uptodate(bh[ii])) | |
5784 | + goto read_error; | |
5785 | + } | |
5786 | + | |
5787 | +retry_write: | |
5788 | + for (i = 0; i < chunk; i++) | |
5789 | + mark_buffer_dirty_lowprio(bh[i]); | |
5790 | + | |
5791 | + ll_rw_block(WRITE, chunk, bh); | |
5792 | + run_task_queue(&tq_disk); | |
5793 | + | |
5794 | + for (i = 0; i < chunk; i++) { | |
5795 | + ii = chunk-i-1; | |
5796 | + wait_on_buffer(bh[ii]); | |
5797 | + | |
5798 | + if (spare && disk_faulty(spare)) { | |
5799 | + for (k = 0; k < chunk; k++) | |
5800 | + brelse(bh[k]); | |
5801 | + printk(" <SPARE FAILED!>\n "); | |
5802 | + err = -EIO; | |
5803 | + goto out; | |
5804 | + } | |
5805 | + | |
5806 | + if (!buffer_uptodate(bh[ii])) { | |
5807 | + curr_bsize = device_bsize(read_disk); | |
5808 | + if (curr_bsize != blocksize) { | |
5809 | + printk(KERN_INFO | |
5810 | + "md%d: blocksize changed during write\n", | |
5811 | + mdidx(mddev)); | |
5812 | + for (k = 0; k < chunk; k++) | |
5813 | + if (bh[k]) { | |
5814 | + if (buffer_lowprio(bh[k])) | |
5815 | + mark_buffer_clean(bh[k]); | |
5816 | + brelse(bh[k]); | |
5817 | + } | |
5818 | + goto retry_read; | |
5819 | + } | |
5820 | + printk(" BAD WRITE %8d>\n", j); | |
5821 | + /* | |
5822 | + * Ouch, write error, retry or bail out. | |
5823 | + */ | |
5824 | + if (max_write_errors) { | |
5825 | + max_write_errors--; | |
5826 | + printk ( KERN_WARNING "md%d: write error while reconstructing, at block %u(%d).\n", mdidx(mddev), j, blocksize); | |
5827 | + goto retry_write; | |
5828 | + } | |
5829 | + printk ( KERN_ALERT | |
5830 | + "too many write errors, stopping reconstruction.\n"); | |
5831 | + for (k = 0; k < chunk; k++) | |
5832 | + if (bh[k]) { | |
5833 | + if (buffer_lowprio(bh[k])) | |
5834 | + mark_buffer_clean(bh[k]); | |
5835 | + brelse(bh[k]); | |
5836 | + } | |
5837 | + err = -EIO; | |
5838 | + goto out; | |
5839 | + } | |
5840 | } | |
5841 | ||
5842 | /* | |
5843 | - * FIXME: put this status bar thing into /proc | |
5844 | + * This is the normal 'everything went OK' case | |
5845 | + * do a 'free-behind' logic, we sure dont need | |
5846 | + * this buffer if it was the only user. | |
5847 | */ | |
5848 | - if (!(j%(max_blocks/100))) { | |
5849 | - if (!(percent%10)) | |
5850 | - printk (" %03d%% done.\n",percent); | |
5851 | + for (i = 0; i < chunk; i++) | |
5852 | + if (buffer_dirty(bh[i])) | |
5853 | + brelse(bh[i]); | |
5854 | else | |
5855 | - printk ("."); | |
5856 | - percent++; | |
5857 | + bforget(bh[i]); | |
5858 | + | |
5859 | + | |
5860 | + if (md_signal_pending(current)) { | |
5861 | + /* | |
5862 | + * got a signal, exit. | |
5863 | + */ | |
5864 | + mddev->curr_resync = 0; | |
5865 | + printk("md_do_sync() got signal ... exiting\n"); | |
5866 | + md_flush_signals(); | |
5867 | + err = -EINTR; | |
5868 | + goto out; | |
5869 | } | |
5870 | + | |
5871 | + /* | |
5872 | + * this loop exits only if either when we are slower than | |
5873 | + * the 'hard' speed limit, or the system was IO-idle for | |
5874 | + * a jiffy. | |
5875 | + * the system might be non-idle CPU-wise, but we only care | |
5876 | + * about not overloading the IO subsystem. (things like an | |
5877 | + * e2fsck being done on the RAID array should execute fast) | |
5878 | + */ | |
5879 | +repeat: | |
5880 | + if (md_need_resched(current)) | |
5881 | + schedule(); | |
5882 | + | |
5883 | + if ((blocksize/1024)*j/((jiffies-starttime)/HZ + 1) + 1 | |
5884 | + > sysctl_speed_limit) { | |
5885 | + current->priority = 1; | |
5886 | + | |
5887 | + if (!is_mddev_idle(mddev)) { | |
5888 | + current->state = TASK_INTERRUPTIBLE; | |
5889 | + md_schedule_timeout(HZ/2); | |
5890 | + if (!md_signal_pending(current)) | |
5891 | + goto repeat; | |
5892 | + } | |
5893 | + } else | |
5894 | + current->priority = 40; | |
5895 | } | |
5896 | fsync_dev(read_disk); | |
5897 | - printk("md: %s: sync done.\n", kdevname(read_disk)); | |
5898 | - mddev->busy--; | |
5899 | - return 0; | |
5900 | + printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev)); | |
5901 | + err = 0; | |
5902 | + /* | |
5903 | + * this also signals 'finished resyncing' to md_stop | |
5904 | + */ | |
5905 | +out: | |
5906 | + up(&mddev->resync_sem); | |
5907 | +out_nolock: | |
5908 | + free_pages((unsigned long)bh, RA_ORDER); | |
5909 | + mddev->curr_resync = 0; | |
5910 | + wake_up(&resync_wait); | |
5911 | + return err; | |
5912 | + | |
5913 | +read_error: | |
5914 | + /* | |
5915 | + * set_blocksize() might change the blocksize. This | |
5916 | + * should not happen often, but it happens when eg. | |
5917 | + * someone mounts a filesystem that has non-1k | |
5918 | + * blocksize. set_blocksize() doesnt touch our | |
5919 | + * buffer, but to avoid aliasing problems we change | |
5920 | + * our internal blocksize too and retry the read. | |
5921 | + */ | |
5922 | + curr_bsize = device_bsize(read_disk); | |
5923 | + if (curr_bsize != blocksize) { | |
5924 | + printk(KERN_INFO "md%d: blocksize changed during read\n", | |
5925 | + mdidx(mddev)); | |
5926 | + for (k = 0; k < chunk; k++) | |
5927 | + if (bh[k]) { | |
5928 | + if (buffer_lowprio(bh[k])) | |
5929 | + mark_buffer_clean(bh[k]); | |
5930 | + brelse(bh[k]); | |
5931 | + } | |
5932 | + goto retry_read; | |
5933 | + } | |
5934 | + | |
5935 | + /* | |
5936 | + * It's a real read problem. We retry and bail out | |
5937 | + * only if it's excessive. | |
5938 | + */ | |
5939 | + if (max_read_errors) { | |
5940 | + max_read_errors--; | |
5941 | + printk ( KERN_WARNING "md%d: read error while reconstructing, at block %u(%d).\n", mdidx(mddev), j, blocksize); | |
5942 | + for (k = 0; k < chunk; k++) | |
5943 | + if (bh[k]) { | |
5944 | + if (buffer_lowprio(bh[k])) | |
5945 | + mark_buffer_clean(bh[k]); | |
5946 | + brelse(bh[k]); | |
5947 | + } | |
5948 | + goto retry_read; | |
5949 | + } | |
5950 | + printk ( KERN_ALERT "too many read errors, stopping reconstruction.\n"); | |
5951 | + for (k = 0; k < chunk; k++) | |
5952 | + if (bh[k]) { | |
5953 | + if (buffer_lowprio(bh[k])) | |
5954 | + mark_buffer_clean(bh[k]); | |
5955 | + brelse(bh[k]); | |
5956 | + } | |
5957 | + err = -EIO; | |
5958 | + goto out; | |
5959 | } | |
5960 | ||
5961 | +#undef MAX_NR_BLOCKS | |
5962 | + | |
5963 | /* | |
5964 | - * This is a kernel thread which: syncs a spare disk with the active array | |
5965 | + * This is a kernel thread which syncs a spare disk with the active array | |
5966 | * | |
5967 | * the amount of foolproofing might seem to be a tad excessive, but an | |
5968 | * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs | |
5969 | * of my root partition with the first 0.5 gigs of my /home partition ... so | |
5970 | * i'm a bit nervous ;) | |
5971 | */ | |
5972 | -void mdsyncd (void *data) | |
5973 | +void md_do_recovery (void *data) | |
5974 | { | |
5975 | - int i; | |
5976 | - struct md_dev *mddev; | |
5977 | - md_superblock_t *sb; | |
5978 | - md_descriptor_t *spare; | |
5979 | + int err; | |
5980 | + mddev_t *mddev; | |
5981 | + mdp_super_t *sb; | |
5982 | + mdp_disk_t *spare; | |
5983 | unsigned long flags; | |
5984 | + struct md_list_head *tmp; | |
5985 | ||
5986 | - for (i = 0, mddev = md_dev; i < MAX_MD_DEV; i++, mddev++) { | |
5987 | - if ((sb = mddev->sb) == NULL) | |
5988 | + printk(KERN_INFO "md: recovery thread got woken up ...\n"); | |
5989 | +restart: | |
5990 | + ITERATE_MDDEV(mddev,tmp) { | |
5991 | + sb = mddev->sb; | |
5992 | + if (!sb) | |
5993 | + continue; | |
5994 | + if (mddev->recovery_running) | |
5995 | continue; | |
5996 | if (sb->active_disks == sb->raid_disks) | |
5997 | continue; | |
5998 | - if (!sb->spare_disks) | |
5999 | + if (!sb->spare_disks) { | |
6000 | + printk(KERN_ERR "md%d: no spare disk to reconstruct array! -- continuing in degraded mode\n", mdidx(mddev)); | |
6001 | continue; | |
6002 | + } | |
6003 | + /* | |
6004 | + * now here we get the spare and resync it. | |
6005 | + */ | |
6006 | if ((spare = get_spare(mddev)) == NULL) | |
6007 | continue; | |
6008 | - if (!mddev->pers->mark_spare) | |
6009 | + printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", mdidx(mddev), partition_name(MKDEV(spare->major,spare->minor))); | |
6010 | + if (!mddev->pers->diskop) | |
6011 | continue; | |
6012 | - if (mddev->pers->mark_spare(mddev, spare, SPARE_WRITE)) | |
6013 | + if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) | |
6014 | continue; | |
6015 | - if (md_do_sync(mddev) || (spare->state & (1 << MD_FAULTY_DEVICE))) { | |
6016 | - mddev->pers->mark_spare(mddev, spare, SPARE_INACTIVE); | |
6017 | + down(&mddev->recovery_sem); | |
6018 | + mddev->recovery_running = 1; | |
6019 | + err = md_do_sync(mddev, spare); | |
6020 | + if (err == -EIO) { | |
6021 | + printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n", mdidx(mddev), partition_name(MKDEV(spare->major,spare->minor))); | |
6022 | + if (!disk_faulty(spare)) { | |
6023 | + mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE); | |
6024 | + mark_disk_faulty(spare); | |
6025 | + mark_disk_nonsync(spare); | |
6026 | + mark_disk_inactive(spare); | |
6027 | + sb->spare_disks--; | |
6028 | + sb->working_disks--; | |
6029 | + sb->failed_disks++; | |
6030 | + } | |
6031 | + } else | |
6032 | + if (disk_faulty(spare)) | |
6033 | + mddev->pers->diskop(mddev, &spare, | |
6034 | + DISKOP_SPARE_INACTIVE); | |
6035 | + if (err == -EINTR) { | |
6036 | + /* | |
6037 | + * Recovery got interrupted ... | |
6038 | + * signal back that we have finished using the array. | |
6039 | + */ | |
6040 | + mddev->pers->diskop(mddev, &spare, | |
6041 | + DISKOP_SPARE_INACTIVE); | |
6042 | + up(&mddev->recovery_sem); | |
6043 | + mddev->recovery_running = 0; | |
6044 | continue; | |
6045 | + } else { | |
6046 | + mddev->recovery_running = 0; | |
6047 | + up(&mddev->recovery_sem); | |
6048 | } | |
6049 | save_flags(flags); | |
6050 | cli(); | |
6051 | - mddev->pers->mark_spare(mddev, spare, SPARE_ACTIVE); | |
6052 | - spare->state |= (1 << MD_SYNC_DEVICE); | |
6053 | - spare->state |= (1 << MD_ACTIVE_DEVICE); | |
6054 | - sb->spare_disks--; | |
6055 | - sb->active_disks++; | |
6056 | - mddev->sb_dirty = 1; | |
6057 | - md_update_sb(mddev - md_dev); | |
6058 | + if (!disk_faulty(spare)) { | |
6059 | + /* | |
6060 | + * the SPARE_ACTIVE diskop possibly changes the | |
6061 | + * pointer too | |
6062 | + */ | |
6063 | + mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); | |
6064 | + mark_disk_sync(spare); | |
6065 | + mark_disk_active(spare); | |
6066 | + sb->active_disks++; | |
6067 | + sb->spare_disks--; | |
6068 | + } | |
6069 | restore_flags(flags); | |
6070 | + mddev->sb_dirty = 1; | |
6071 | + md_update_sb(mddev); | |
6072 | + goto restart; | |
6073 | } | |
6074 | + printk(KERN_INFO "md: recovery thread finished ...\n"); | |
6075 | ||
6076 | } | |
6077 | ||
6078 | +int md_notify_reboot(struct notifier_block *this, | |
6079 | + unsigned long code, void *x) | |
6080 | +{ | |
6081 | + struct md_list_head *tmp; | |
6082 | + mddev_t *mddev; | |
6083 | + | |
6084 | + if ((code == MD_SYS_DOWN) || (code == MD_SYS_HALT) | |
6085 | + || (code == MD_SYS_POWER_OFF)) { | |
6086 | + | |
6087 | + printk(KERN_INFO "stopping all md devices.\n"); | |
6088 | + | |
6089 | + ITERATE_MDDEV(mddev,tmp) | |
6090 | + do_md_stop (mddev, 1); | |
6091 | + /* | |
6092 | + * certain more exotic SCSI devices are known to be | |
6093 | + * volatile wrt too early system reboots. While the | |
6094 | + * right place to handle this issue is the given | |
6095 | + * driver, we do want to have a safe RAID driver ... | |
6096 | + */ | |
6097 | + md_mdelay(1000*1); | |
6098 | + } | |
6099 | + return NOTIFY_DONE; | |
6100 | +} | |
6101 | + | |
6102 | +struct notifier_block md_notifier = { | |
6103 | + md_notify_reboot, | |
6104 | + NULL, | |
6105 | + 0 | |
6106 | +}; | |
6107 | + | |
6108 | +md__initfunc(void raid_setup(char *str, int *ints)) | |
6109 | +{ | |
6110 | + char tmpline[100]; | |
6111 | + int len, pos, nr, i; | |
6112 | + | |
6113 | + len = strlen(str) + 1; | |
6114 | + nr = 0; | |
6115 | + pos = 0; | |
6116 | + | |
6117 | + for (i = 0; i < len; i++) { | |
6118 | + char c = str[i]; | |
6119 | + | |
6120 | + if (c == ',' || !c) { | |
6121 | + tmpline[pos] = 0; | |
6122 | + if (!strcmp(tmpline,"noautodetect")) | |
6123 | + raid_setup_args.noautodetect = 1; | |
6124 | + nr++; | |
6125 | + pos = 0; | |
6126 | + continue; | |
6127 | + } | |
6128 | + tmpline[pos] = c; | |
6129 | + pos++; | |
6130 | + } | |
6131 | + raid_setup_args.set = 1; | |
6132 | + return; | |
6133 | +} | |
6134 | + | |
6135 | #ifdef CONFIG_MD_BOOT | |
6136 | struct { | |
6137 | int set; | |
6138 | int ints[100]; | |
6139 | char str[100]; | |
6140 | -} md_setup_args __initdata = { | |
6141 | +} md_setup_args md__initdata = { | |
6142 | 0,{0},{0} | |
6143 | }; | |
6144 | ||
6145 | /* called from init/main.c */ | |
6146 | -__initfunc(void md_setup(char *str,int *ints)) | |
6147 | +md__initfunc(void md_setup(char *str,int *ints)) | |
6148 | { | |
6149 | int i; | |
6150 | for(i=0;i<=ints[0];i++) { | |
6151 | @@ -1230,21 +3831,24 @@ | |
6152 | return; | |
6153 | } | |
6154 | ||
6155 | -__initfunc(void do_md_setup(char *str,int *ints)) | |
6156 | +md__initfunc(void do_md_setup(char *str,int *ints)) | |
6157 | { | |
6158 | - int minor, pers, factor, fault; | |
6159 | +#if 0 | |
6160 | + int minor, pers, chunk_size, fault; | |
6161 | kdev_t dev; | |
6162 | int i=1; | |
6163 | ||
6164 | + printk("i plan to phase this out --mingo\n"); | |
6165 | + | |
6166 | if(ints[0] < 4) { | |
6167 | - printk ("md: Too few Arguments (%d).\n", ints[0]); | |
6168 | + printk (KERN_WARNING "md: Too few Arguments (%d).\n", ints[0]); | |
6169 | return; | |
6170 | } | |
6171 | ||
6172 | minor=ints[i++]; | |
6173 | ||
6174 | - if (minor >= MAX_MD_DEV) { | |
6175 | - printk ("md: Minor device number too high.\n"); | |
6176 | + if ((unsigned int)minor >= MAX_MD_DEVS) { | |
6177 | + printk (KERN_WARNING "md: Minor device number too high.\n"); | |
6178 | return; | |
6179 | } | |
6180 | ||
6181 | @@ -1254,18 +3858,20 @@ | |
6182 | case -1: | |
6183 | #ifdef CONFIG_MD_LINEAR | |
6184 | pers = LINEAR; | |
6185 | - printk ("md: Setting up md%d as linear device.\n",minor); | |
6186 | + printk (KERN_INFO "md: Setting up md%d as linear device.\n", | |
6187 | + minor); | |
6188 | #else | |
6189 | - printk ("md: Linear mode not configured." | |
6190 | + printk (KERN_WARNING "md: Linear mode not configured." | |
6191 | "Recompile the kernel with linear mode enabled!\n"); | |
6192 | #endif | |
6193 | break; | |
6194 | case 0: | |
6195 | pers = STRIPED; | |
6196 | #ifdef CONFIG_MD_STRIPED | |
6197 | - printk ("md: Setting up md%d as a striped device.\n",minor); | |
6198 | + printk (KERN_INFO "md: Setting up md%d as a striped device.\n", | |
6199 | + minor); | |
6200 | #else | |
6201 | - printk ("md: Striped mode not configured." | |
6202 | + printk (KERN_WARNING "md: Striped mode not configured." | |
6203 | "Recompile the kernel with striped mode enabled!\n"); | |
6204 | #endif | |
6205 | break; | |
6206 | @@ -1280,79 +3886,145 @@ | |
6207 | break; | |
6208 | */ | |
6209 | default: | |
6210 | - printk ("md: Unknown or not supported raid level %d.\n", ints[--i]); | |
6211 | + printk (KERN_WARNING "md: Unknown or not supported raid level %d.\n", ints[--i]); | |
6212 | return; | |
6213 | } | |
6214 | ||
6215 | - if(pers) { | |
6216 | + if (pers) { | |
6217 | ||
6218 | - factor=ints[i++]; /* Chunksize */ | |
6219 | - fault =ints[i++]; /* Faultlevel */ | |
6220 | + chunk_size = ints[i++]; /* Chunksize */ | |
6221 | + fault = ints[i++]; /* Faultlevel */ | |
6222 | ||
6223 | - pers=pers | factor | (fault << FAULT_SHIFT); | |
6224 | + pers = pers | chunk_size | (fault << FAULT_SHIFT); | |
6225 | ||
6226 | - while( str && (dev = name_to_kdev_t(str))) { | |
6227 | - do_md_add (minor, dev); | |
6228 | - if((str = strchr (str, ',')) != NULL) | |
6229 | - str++; | |
6230 | - } | |
6231 | + while( str && (dev = name_to_kdev_t(str))) { | |
6232 | + do_md_add (minor, dev); | |
6233 | + if((str = strchr (str, ',')) != NULL) | |
6234 | + str++; | |
6235 | + } | |
6236 | ||
6237 | - do_md_run (minor, pers); | |
6238 | - printk ("md: Loading md%d.\n",minor); | |
6239 | + do_md_run (minor, pers); | |
6240 | + printk (KERN_INFO "md: Loading md%d.\n",minor); | |
6241 | } | |
6242 | - | |
6243 | +#endif | |
6244 | } | |
6245 | #endif | |
6246 | ||
6247 | +void hsm_init (void); | |
6248 | +void translucent_init (void); | |
6249 | void linear_init (void); | |
6250 | void raid0_init (void); | |
6251 | void raid1_init (void); | |
6252 | void raid5_init (void); | |
6253 | ||
6254 | -__initfunc(int md_init (void)) | |
6255 | +md__initfunc(int md_init (void)) | |
6256 | { | |
6257 | - printk ("md driver %d.%d.%d MAX_MD_DEV=%d, MAX_REAL=%d\n", | |
6258 | - MD_MAJOR_VERSION, MD_MINOR_VERSION, MD_PATCHLEVEL_VERSION, | |
6259 | - MAX_MD_DEV, MAX_REAL); | |
6260 | - | |
6261 | - if (register_blkdev (MD_MAJOR, "md", &md_fops)) | |
6262 | - { | |
6263 | - printk ("Unable to get major %d for md\n", MD_MAJOR); | |
6264 | - return (-1); | |
6265 | - } | |
6266 | - | |
6267 | - blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST; | |
6268 | - blk_dev[MD_MAJOR].current_request=NULL; | |
6269 | - read_ahead[MD_MAJOR]=INT_MAX; | |
6270 | - memset(md_dev, 0, MAX_MD_DEV * sizeof (struct md_dev)); | |
6271 | - md_gendisk.next=gendisk_head; | |
6272 | - | |
6273 | - gendisk_head=&md_gendisk; | |
6274 | - | |
6275 | -#if SUPPORT_RECONSTRUCTION | |
6276 | - if ((md_sync_thread = md_register_thread(mdsyncd, NULL)) == NULL) | |
6277 | - printk("md: bug: md_sync_thread == NULL\n"); | |
6278 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
6279 | + static char * name = "mdrecoveryd"; | |
6280 | + | |
6281 | + printk (KERN_INFO "md driver %d.%d.%d MAX_MD_DEVS=%d, MAX_REAL=%d\n", | |
6282 | + MD_MAJOR_VERSION, MD_MINOR_VERSION, | |
6283 | + MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MAX_REAL); | |
6284 | + | |
6285 | + if (register_blkdev (MD_MAJOR, "md", &md_fops)) | |
6286 | + { | |
6287 | + printk (KERN_ALERT "Unable to get major %d for md\n", MD_MAJOR); | |
6288 | + return (-1); | |
6289 | + } | |
6290 | + | |
6291 | + blk_dev[MD_MAJOR].request_fn = DEVICE_REQUEST; | |
6292 | + blk_dev[MD_MAJOR].current_request = NULL; | |
6293 | + read_ahead[MD_MAJOR] = INT_MAX; | |
6294 | + md_gendisk.next = gendisk_head; | |
6295 | + | |
6296 | + gendisk_head = &md_gendisk; | |
6297 | + | |
6298 | + md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); | |
6299 | + if (!md_recovery_thread) | |
6300 | + printk(KERN_ALERT "bug: couldn't allocate md_recovery_thread\n"); | |
6301 | ||
6302 | + md_register_reboot_notifier(&md_notifier); | |
6303 | + md_register_sysctl(); | |
6304 | + | |
6305 | +#ifdef CONFIG_MD_HSM | |
6306 | + hsm_init (); | |
6307 | +#endif | |
6308 | +#ifdef CONFIG_MD_TRANSLUCENT | |
6309 | + translucent_init (); | |
6310 | +#endif | |
6311 | #ifdef CONFIG_MD_LINEAR | |
6312 | - linear_init (); | |
6313 | + linear_init (); | |
6314 | #endif | |
6315 | #ifdef CONFIG_MD_STRIPED | |
6316 | - raid0_init (); | |
6317 | + raid0_init (); | |
6318 | #endif | |
6319 | #ifdef CONFIG_MD_MIRRORING | |
6320 | - raid1_init (); | |
6321 | + raid1_init (); | |
6322 | #endif | |
6323 | #ifdef CONFIG_MD_RAID5 | |
6324 | - raid5_init (); | |
6325 | + raid5_init (); | |
6326 | +#endif | |
6327 | +#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE) | |
6328 | + /* | |
6329 | + * pick a XOR routine, runtime. | |
6330 | + */ | |
6331 | + calibrate_xor_block(); | |
6332 | #endif | |
6333 | - return (0); | |
6334 | + | |
6335 | + return (0); | |
6336 | } | |
6337 | ||
6338 | #ifdef CONFIG_MD_BOOT | |
6339 | -__initfunc(void md_setup_drive(void)) | |
6340 | +md__initfunc(void md_setup_drive(void)) | |
6341 | { | |
6342 | if(md_setup_args.set) | |
6343 | do_md_setup(md_setup_args.str, md_setup_args.ints); | |
6344 | } | |
6345 | #endif | |
6346 | + | |
6347 | +MD_EXPORT_SYMBOL(md_size); | |
6348 | +MD_EXPORT_SYMBOL(register_md_personality); | |
6349 | +MD_EXPORT_SYMBOL(unregister_md_personality); | |
6350 | +MD_EXPORT_SYMBOL(partition_name); | |
6351 | +MD_EXPORT_SYMBOL(md_error); | |
6352 | +MD_EXPORT_SYMBOL(md_recover_arrays); | |
6353 | +MD_EXPORT_SYMBOL(md_register_thread); | |
6354 | +MD_EXPORT_SYMBOL(md_unregister_thread); | |
6355 | +MD_EXPORT_SYMBOL(md_update_sb); | |
6356 | +MD_EXPORT_SYMBOL(md_map); | |
6357 | +MD_EXPORT_SYMBOL(md_wakeup_thread); | |
6358 | +MD_EXPORT_SYMBOL(md_do_sync); | |
6359 | +MD_EXPORT_SYMBOL(md_print_devices); | |
6360 | +MD_EXPORT_SYMBOL(find_rdev_nr); | |
6361 | +MD_EXPORT_SYMBOL(md_check_ordering); | |
6362 | +MD_EXPORT_SYMBOL(md_interrupt_thread); | |
6363 | +MD_EXPORT_SYMBOL(mddev_map); | |
6364 | + | |
6365 | +#ifdef CONFIG_PROC_FS | |
6366 | +static struct proc_dir_entry proc_md = { | |
6367 | + PROC_MD, 6, "mdstat", | |
6368 | + S_IFREG | S_IRUGO, 1, 0, 0, | |
6369 | + 0, &proc_array_inode_operations, | |
6370 | +}; | |
6371 | +#endif | |
6372 | + | |
6373 | +static void md_geninit (struct gendisk *gdisk) | |
6374 | +{ | |
6375 | + int i; | |
6376 | + | |
6377 | + for(i = 0; i < MAX_MD_DEVS; i++) { | |
6378 | + md_blocksizes[i] = 1024; | |
6379 | + md_maxreadahead[i] = MD_READAHEAD; | |
6380 | + md_gendisk.part[i].start_sect = -1; /* avoid partition check */ | |
6381 | + md_gendisk.part[i].nr_sects = 0; | |
6382 | + } | |
6383 | + | |
6384 | + printk("md.c: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); | |
6385 | + | |
6386 | + blksize_size[MD_MAJOR] = md_blocksizes; | |
6387 | + md_set_global_readahead(md_maxreadahead); | |
6388 | + | |
6389 | +#ifdef CONFIG_PROC_FS | |
6390 | + proc_register(&proc_root, &proc_md); | |
6391 | +#endif | |
6392 | +} | |
6393 | + | |
6394 | diff -ruN linux.orig/drivers/block/raid0.c linux-2.2.16/drivers/block/raid0.c | |
6395 | --- linux.orig/drivers/block/raid0.c Tue Jan 4 19:12:14 2000 | |
6396 | +++ linux-2.2.16/drivers/block/raid0.c Fri Jun 9 11:37:45 2000 | |
6397 | @@ -1,4 +1,3 @@ | |
6398 | - | |
6399 | /* | |
6400 | raid0.c : Multiple Devices driver for Linux | |
6401 | Copyright (C) 1994-96 Marc ZYNGIER | |
6402 | @@ -18,146 +17,201 @@ | |
6403 | */ | |
6404 | ||
6405 | #include <linux/module.h> | |
6406 | -#include <linux/md.h> | |
6407 | -#include <linux/raid0.h> | |
6408 | -#include <linux/vmalloc.h> | |
6409 | +#include <linux/raid/raid0.h> | |
6410 | ||
6411 | #define MAJOR_NR MD_MAJOR | |
6412 | #define MD_DRIVER | |
6413 | #define MD_PERSONALITY | |
6414 | ||
6415 | -static int create_strip_zones (int minor, struct md_dev *mddev) | |
6416 | +static int create_strip_zones (mddev_t *mddev) | |
6417 | { | |
6418 | - int i, j, c=0; | |
6419 | - int current_offset=0; | |
6420 | - struct real_dev *smallest_by_zone; | |
6421 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
6422 | - | |
6423 | - data->nr_strip_zones=1; | |
6424 | - | |
6425 | - for (i=1; i<mddev->nb_dev; i++) | |
6426 | - { | |
6427 | - for (j=0; j<i; j++) | |
6428 | - if (mddev->devices[i].size==mddev->devices[j].size) | |
6429 | - { | |
6430 | - c=1; | |
6431 | - break; | |
6432 | - } | |
6433 | - | |
6434 | - if (!c) | |
6435 | - data->nr_strip_zones++; | |
6436 | - | |
6437 | - c=0; | |
6438 | - } | |
6439 | - | |
6440 | - if ((data->strip_zone=vmalloc(sizeof(struct strip_zone)*data->nr_strip_zones)) == NULL) | |
6441 | - return 1; | |
6442 | - | |
6443 | - data->smallest=NULL; | |
6444 | - | |
6445 | - for (i=0; i<data->nr_strip_zones; i++) | |
6446 | - { | |
6447 | - data->strip_zone[i].dev_offset=current_offset; | |
6448 | - smallest_by_zone=NULL; | |
6449 | - c=0; | |
6450 | - | |
6451 | - for (j=0; j<mddev->nb_dev; j++) | |
6452 | - if (mddev->devices[j].size>current_offset) | |
6453 | - { | |
6454 | - data->strip_zone[i].dev[c++]=mddev->devices+j; | |
6455 | - if (!smallest_by_zone || | |
6456 | - smallest_by_zone->size > mddev->devices[j].size) | |
6457 | - smallest_by_zone=mddev->devices+j; | |
6458 | - } | |
6459 | - | |
6460 | - data->strip_zone[i].nb_dev=c; | |
6461 | - data->strip_zone[i].size=(smallest_by_zone->size-current_offset)*c; | |
6462 | - | |
6463 | - if (!data->smallest || | |
6464 | - data->smallest->size > data->strip_zone[i].size) | |
6465 | - data->smallest=data->strip_zone+i; | |
6466 | - | |
6467 | - data->strip_zone[i].zone_offset=i ? (data->strip_zone[i-1].zone_offset+ | |
6468 | - data->strip_zone[i-1].size) : 0; | |
6469 | - current_offset=smallest_by_zone->size; | |
6470 | - } | |
6471 | - return 0; | |
6472 | + int i, c, j, j1, j2; | |
6473 | + int current_offset, curr_zone_offset; | |
6474 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
6475 | + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; | |
6476 | + | |
6477 | + /* | |
6478 | + * The number of 'same size groups' | |
6479 | + */ | |
6480 | + conf->nr_strip_zones = 0; | |
6481 | + | |
6482 | + ITERATE_RDEV_ORDERED(mddev,rdev1,j1) { | |
6483 | + printk("raid0: looking at %s\n", partition_name(rdev1->dev)); | |
6484 | + c = 0; | |
6485 | + ITERATE_RDEV_ORDERED(mddev,rdev2,j2) { | |
6486 | + printk("raid0: comparing %s(%d) with %s(%d)\n", partition_name(rdev1->dev), rdev1->size, partition_name(rdev2->dev), rdev2->size); | |
6487 | + if (rdev2 == rdev1) { | |
6488 | + printk("raid0: END\n"); | |
6489 | + break; | |
6490 | + } | |
6491 | + if (rdev2->size == rdev1->size) | |
6492 | + { | |
6493 | + /* | |
6494 | + * Not unique, dont count it as a new | |
6495 | + * group | |
6496 | + */ | |
6497 | + printk("raid0: EQUAL\n"); | |
6498 | + c = 1; | |
6499 | + break; | |
6500 | + } | |
6501 | + printk("raid0: NOT EQUAL\n"); | |
6502 | + } | |
6503 | + if (!c) { | |
6504 | + printk("raid0: ==> UNIQUE\n"); | |
6505 | + conf->nr_strip_zones++; | |
6506 | + printk("raid0: %d zones\n", conf->nr_strip_zones); | |
6507 | + } | |
6508 | + } | |
6509 | + printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); | |
6510 | + | |
6511 | + conf->strip_zone = vmalloc(sizeof(struct strip_zone)* | |
6512 | + conf->nr_strip_zones); | |
6513 | + if (!conf->strip_zone) | |
6514 | + return 1; | |
6515 | + | |
6516 | + | |
6517 | + conf->smallest = NULL; | |
6518 | + current_offset = 0; | |
6519 | + curr_zone_offset = 0; | |
6520 | + | |
6521 | + for (i = 0; i < conf->nr_strip_zones; i++) | |
6522 | + { | |
6523 | + struct strip_zone *zone = conf->strip_zone + i; | |
6524 | + | |
6525 | + printk("zone %d\n", i); | |
6526 | + zone->dev_offset = current_offset; | |
6527 | + smallest = NULL; | |
6528 | + c = 0; | |
6529 | + | |
6530 | + ITERATE_RDEV_ORDERED(mddev,rdev,j) { | |
6531 | + | |
6532 | + printk(" checking %s ...", partition_name(rdev->dev)); | |
6533 | + if (rdev->size > current_offset) | |
6534 | + { | |
6535 | + printk(" contained as device %d\n", c); | |
6536 | + zone->dev[c] = rdev; | |
6537 | + c++; | |
6538 | + if (!smallest || (rdev->size <smallest->size)) { | |
6539 | + smallest = rdev; | |
6540 | + printk(" (%d) is smallest!.\n", rdev->size); | |
6541 | + } | |
6542 | + } else | |
6543 | + printk(" nope.\n"); | |
6544 | + } | |
6545 | + | |
6546 | + zone->nb_dev = c; | |
6547 | + zone->size = (smallest->size - current_offset) * c; | |
6548 | + printk(" zone->nb_dev: %d, size: %d\n",zone->nb_dev,zone->size); | |
6549 | + | |
6550 | + if (!conf->smallest || (zone->size < conf->smallest->size)) | |
6551 | + conf->smallest = zone; | |
6552 | + | |
6553 | + zone->zone_offset = curr_zone_offset; | |
6554 | + curr_zone_offset += zone->size; | |
6555 | + | |
6556 | + current_offset = smallest->size; | |
6557 | + printk("current zone offset: %d\n", current_offset); | |
6558 | + } | |
6559 | + printk("done.\n"); | |
6560 | + return 0; | |
6561 | } | |
6562 | ||
6563 | -static int raid0_run (int minor, struct md_dev *mddev) | |
6564 | +static int raid0_run (mddev_t *mddev) | |
6565 | { | |
6566 | - int cur=0, i=0, size, zone0_size, nb_zone; | |
6567 | - struct raid0_data *data; | |
6568 | - | |
6569 | - MOD_INC_USE_COUNT; | |
6570 | + int cur=0, i=0, size, zone0_size, nb_zone; | |
6571 | + raid0_conf_t *conf; | |
6572 | ||
6573 | - if ((mddev->private=vmalloc (sizeof (struct raid0_data))) == NULL) return 1; | |
6574 | - data=(struct raid0_data *) mddev->private; | |
6575 | - | |
6576 | - if (create_strip_zones (minor, mddev)) | |
6577 | - { | |
6578 | - vfree(data); | |
6579 | - return 1; | |
6580 | - } | |
6581 | - | |
6582 | - nb_zone=data->nr_zones= | |
6583 | - md_size[minor]/data->smallest->size + | |
6584 | - (md_size[minor]%data->smallest->size ? 1 : 0); | |
6585 | - | |
6586 | - printk ("raid0 : Allocating %ld bytes for hash.\n",(long)sizeof(struct raid0_hash)*nb_zone); | |
6587 | - if ((data->hash_table=vmalloc (sizeof (struct raid0_hash)*nb_zone)) == NULL) | |
6588 | - { | |
6589 | - vfree(data->strip_zone); | |
6590 | - vfree(data); | |
6591 | - return 1; | |
6592 | - } | |
6593 | - size=data->strip_zone[cur].size; | |
6594 | - | |
6595 | - i=0; | |
6596 | - while (cur<data->nr_strip_zones) | |
6597 | - { | |
6598 | - data->hash_table[i].zone0=data->strip_zone+cur; | |
6599 | - | |
6600 | - if (size>=data->smallest->size)/* If we completely fill the slot */ | |
6601 | - { | |
6602 | - data->hash_table[i++].zone1=NULL; | |
6603 | - size-=data->smallest->size; | |
6604 | - | |
6605 | - if (!size) | |
6606 | - { | |
6607 | - if (++cur==data->nr_strip_zones) continue; | |
6608 | - size=data->strip_zone[cur].size; | |
6609 | - } | |
6610 | - | |
6611 | - continue; | |
6612 | - } | |
6613 | - | |
6614 | - if (++cur==data->nr_strip_zones) /* Last dev, set unit1 as NULL */ | |
6615 | - { | |
6616 | - data->hash_table[i].zone1=NULL; | |
6617 | - continue; | |
6618 | - } | |
6619 | - | |
6620 | - zone0_size=size; /* Here, we use a 2nd dev to fill the slot */ | |
6621 | - size=data->strip_zone[cur].size; | |
6622 | - data->hash_table[i++].zone1=data->strip_zone+cur; | |
6623 | - size-=(data->smallest->size - zone0_size); | |
6624 | - } | |
6625 | + MOD_INC_USE_COUNT; | |
6626 | ||
6627 | - return (0); | |
6628 | + conf = vmalloc(sizeof (raid0_conf_t)); | |
6629 | + if (!conf) | |
6630 | + goto out; | |
6631 | + mddev->private = (void *)conf; | |
6632 | + | |
6633 | + if (md_check_ordering(mddev)) { | |
6634 | + printk("raid0: disks are not ordered, aborting!\n"); | |
6635 | + goto out_free_conf; | |
6636 | + } | |
6637 | + | |
6638 | + if (create_strip_zones (mddev)) | |
6639 | + goto out_free_conf; | |
6640 | + | |
6641 | + printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]); | |
6642 | + printk("raid0 : conf->smallest->size is %d blocks.\n", conf->smallest->size); | |
6643 | + nb_zone = md_size[mdidx(mddev)]/conf->smallest->size + | |
6644 | + (md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0); | |
6645 | + printk("raid0 : nb_zone is %d.\n", nb_zone); | |
6646 | + conf->nr_zones = nb_zone; | |
6647 | + | |
6648 | + printk("raid0 : Allocating %d bytes for hash.\n", | |
6649 | + sizeof(struct raid0_hash)*nb_zone); | |
6650 | + | |
6651 | + conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone); | |
6652 | + if (!conf->hash_table) | |
6653 | + goto out_free_zone_conf; | |
6654 | + size = conf->strip_zone[cur].size; | |
6655 | + | |
6656 | + i = 0; | |
6657 | + while (cur < conf->nr_strip_zones) { | |
6658 | + conf->hash_table[i].zone0 = conf->strip_zone + cur; | |
6659 | + | |
6660 | + /* | |
6661 | + * If we completely fill the slot | |
6662 | + */ | |
6663 | + if (size >= conf->smallest->size) { | |
6664 | + conf->hash_table[i++].zone1 = NULL; | |
6665 | + size -= conf->smallest->size; | |
6666 | + | |
6667 | + if (!size) { | |
6668 | + if (++cur == conf->nr_strip_zones) | |
6669 | + continue; | |
6670 | + size = conf->strip_zone[cur].size; | |
6671 | + } | |
6672 | + continue; | |
6673 | + } | |
6674 | + if (++cur == conf->nr_strip_zones) { | |
6675 | + /* | |
6676 | + * Last dev, set unit1 as NULL | |
6677 | + */ | |
6678 | + conf->hash_table[i].zone1=NULL; | |
6679 | + continue; | |
6680 | + } | |
6681 | + | |
6682 | + /* | |
6683 | + * Here we use a 2nd dev to fill the slot | |
6684 | + */ | |
6685 | + zone0_size = size; | |
6686 | + size = conf->strip_zone[cur].size; | |
6687 | + conf->hash_table[i++].zone1 = conf->strip_zone + cur; | |
6688 | + size -= (conf->smallest->size - zone0_size); | |
6689 | + } | |
6690 | + return 0; | |
6691 | + | |
6692 | +out_free_zone_conf: | |
6693 | + vfree(conf->strip_zone); | |
6694 | + conf->strip_zone = NULL; | |
6695 | + | |
6696 | +out_free_conf: | |
6697 | + vfree(conf); | |
6698 | + mddev->private = NULL; | |
6699 | +out: | |
6700 | + MOD_DEC_USE_COUNT; | |
6701 | + return 1; | |
6702 | } | |
6703 | ||
6704 | - | |
6705 | -static int raid0_stop (int minor, struct md_dev *mddev) | |
6706 | +static int raid0_stop (mddev_t *mddev) | |
6707 | { | |
6708 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
6709 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
6710 | ||
6711 | - vfree (data->hash_table); | |
6712 | - vfree (data->strip_zone); | |
6713 | - vfree (data); | |
6714 | + vfree (conf->hash_table); | |
6715 | + conf->hash_table = NULL; | |
6716 | + vfree (conf->strip_zone); | |
6717 | + conf->strip_zone = NULL; | |
6718 | + vfree (conf); | |
6719 | + mddev->private = NULL; | |
6720 | ||
6721 | - MOD_DEC_USE_COUNT; | |
6722 | - return 0; | |
6723 | + MOD_DEC_USE_COUNT; | |
6724 | + return 0; | |
6725 | } | |
6726 | ||
6727 | /* | |
6728 | @@ -167,135 +221,140 @@ | |
6729 | * Of course, those facts may not be valid anymore (and surely won't...) | |
6730 | * Hey guys, there's some work out there ;-) | |
6731 | */ | |
6732 | -static int raid0_map (struct md_dev *mddev, kdev_t *rdev, | |
6733 | +static int raid0_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
6734 | unsigned long *rsector, unsigned long size) | |
6735 | { | |
6736 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
6737 | - static struct raid0_hash *hash; | |
6738 | - struct strip_zone *zone; | |
6739 | - struct real_dev *tmp_dev; | |
6740 | - int blk_in_chunk, factor, chunk, chunk_size; | |
6741 | - long block, rblock; | |
6742 | - | |
6743 | - factor=FACTOR(mddev); | |
6744 | - chunk_size=(1UL << FACTOR_SHIFT(factor)); | |
6745 | - block=*rsector >> 1; | |
6746 | - hash=data->hash_table+(block/data->smallest->size); | |
6747 | - | |
6748 | - if (hash - data->hash_table > data->nr_zones) | |
6749 | - { | |
6750 | - printk(KERN_DEBUG "raid0_map: invalid block %ul\n", block); | |
6751 | - return -1; | |
6752 | - } | |
6753 | - | |
6754 | - /* Sanity check */ | |
6755 | - if ((chunk_size*2)<(*rsector % (chunk_size*2))+size) | |
6756 | - { | |
6757 | - printk ("raid0_convert : can't convert block across chunks or bigger than %dk %ld %ld\n", chunk_size, *rsector, size); | |
6758 | - return (-1); | |
6759 | - } | |
6760 | - | |
6761 | - if (block >= (hash->zone0->size + | |
6762 | - hash->zone0->zone_offset)) | |
6763 | - { | |
6764 | - if (!hash->zone1) | |
6765 | - { | |
6766 | - printk ("raid0_convert : hash->zone1==NULL for block %ld\n", block); | |
6767 | - return (-1); | |
6768 | - } | |
6769 | - | |
6770 | - zone=hash->zone1; | |
6771 | - } | |
6772 | - else | |
6773 | - zone=hash->zone0; | |
6774 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
6775 | + struct raid0_hash *hash; | |
6776 | + struct strip_zone *zone; | |
6777 | + mdk_rdev_t *tmp_dev; | |
6778 | + int blk_in_chunk, chunksize_bits, chunk, chunk_size; | |
6779 | + long block, rblock; | |
6780 | + | |
6781 | + chunk_size = mddev->param.chunk_size >> 10; | |
6782 | + chunksize_bits = ffz(~chunk_size); | |
6783 | + block = *rsector >> 1; | |
6784 | + hash = conf->hash_table + block / conf->smallest->size; | |
6785 | + | |
6786 | + if (hash - conf->hash_table > conf->nr_zones) { | |
6787 | + printk(KERN_DEBUG "raid0_map: invalid block %ul\n", block); | |
6788 | + return -1; | |
6789 | + } | |
6790 | + | |
6791 | + /* Sanity check */ | |
6792 | + if ((chunk_size * 2) < (*rsector % (chunk_size * 2)) + size) | |
6793 | + goto bad_map; | |
6794 | + | |
6795 | + if (!hash) | |
6796 | + goto bad_hash; | |
6797 | + | |
6798 | + if (!hash->zone0) | |
6799 | + goto bad_zone0; | |
6800 | + | |
6801 | + if (block >= (hash->zone0->size + hash->zone0->zone_offset)) { | |
6802 | + if (!hash->zone1) | |
6803 | + goto bad_zone1; | |
6804 | + zone = hash->zone1; | |
6805 | + } else | |
6806 | + zone = hash->zone0; | |
6807 | ||
6808 | - blk_in_chunk=block & (chunk_size -1); | |
6809 | - chunk=(block - zone->zone_offset) / (zone->nb_dev<<FACTOR_SHIFT(factor)); | |
6810 | - tmp_dev=zone->dev[(block >> FACTOR_SHIFT(factor)) % zone->nb_dev]; | |
6811 | - rblock=(chunk << FACTOR_SHIFT(factor)) + blk_in_chunk + zone->dev_offset; | |
6812 | + blk_in_chunk = block & (chunk_size -1); | |
6813 | + chunk = (block - zone->zone_offset) / (zone->nb_dev << chunksize_bits); | |
6814 | + tmp_dev = zone->dev[(block >> chunksize_bits) % zone->nb_dev]; | |
6815 | + rblock = (chunk << chunksize_bits) + blk_in_chunk + zone->dev_offset; | |
6816 | ||
6817 | - *rdev=tmp_dev->dev; | |
6818 | - *rsector=rblock<<1; | |
6819 | + *rdev = tmp_dev->dev; | |
6820 | + *rsector = rblock << 1; | |
6821 | ||
6822 | - return (0); | |
6823 | + return 0; | |
6824 | + | |
6825 | +bad_map: | |
6826 | + printk ("raid0_map bug: can't convert block across chunks or bigger than %dk %ld %ld\n", chunk_size, *rsector, size); | |
6827 | + return -1; | |
6828 | +bad_hash: | |
6829 | + printk("raid0_map bug: hash==NULL for block %ld\n", block); | |
6830 | + return -1; | |
6831 | +bad_zone0: | |
6832 | + printk ("raid0_map bug: hash->zone0==NULL for block %ld\n", block); | |
6833 | + return -1; | |
6834 | +bad_zone1: | |
6835 | + printk ("raid0_map bug: hash->zone1==NULL for block %ld\n", block); | |
6836 | + return -1; | |
6837 | } | |
6838 | ||
6839 | ||
6840 | -static int raid0_status (char *page, int minor, struct md_dev *mddev) | |
6841 | +static int raid0_status (char *page, mddev_t *mddev) | |
6842 | { | |
6843 | - int sz=0; | |
6844 | + int sz = 0; | |
6845 | #undef MD_DEBUG | |
6846 | #ifdef MD_DEBUG | |
6847 | - int j, k; | |
6848 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
6849 | + int j, k; | |
6850 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
6851 | ||
6852 | - sz+=sprintf (page+sz, " "); | |
6853 | - for (j=0; j<data->nr_zones; j++) | |
6854 | - { | |
6855 | - sz+=sprintf (page+sz, "[z%d", | |
6856 | - data->hash_table[j].zone0-data->strip_zone); | |
6857 | - if (data->hash_table[j].zone1) | |
6858 | - sz+=sprintf (page+sz, "/z%d] ", | |
6859 | - data->hash_table[j].zone1-data->strip_zone); | |
6860 | - else | |
6861 | - sz+=sprintf (page+sz, "] "); | |
6862 | - } | |
6863 | + sz += sprintf(page + sz, " "); | |
6864 | + for (j = 0; j < conf->nr_zones; j++) { | |
6865 | + sz += sprintf(page + sz, "[z%d", | |
6866 | + conf->hash_table[j].zone0 - conf->strip_zone); | |
6867 | + if (conf->hash_table[j].zone1) | |
6868 | + sz += sprintf(page+sz, "/z%d] ", | |
6869 | + conf->hash_table[j].zone1 - conf->strip_zone); | |
6870 | + else | |
6871 | + sz += sprintf(page+sz, "] "); | |
6872 | + } | |
6873 | ||
6874 | - sz+=sprintf (page+sz, "\n"); | |
6875 | + sz += sprintf(page + sz, "\n"); | |
6876 | ||
6877 | - for (j=0; j<data->nr_strip_zones; j++) | |
6878 | - { | |
6879 | - sz+=sprintf (page+sz, " z%d=[", j); | |
6880 | - for (k=0; k<data->strip_zone[j].nb_dev; k++) | |
6881 | - sz+=sprintf (page+sz, "%s/", | |
6882 | - partition_name(data->strip_zone[j].dev[k]->dev)); | |
6883 | - sz--; | |
6884 | - sz+=sprintf (page+sz, "] zo=%d do=%d s=%d\n", | |
6885 | - data->strip_zone[j].zone_offset, | |
6886 | - data->strip_zone[j].dev_offset, | |
6887 | - data->strip_zone[j].size); | |
6888 | - } | |
6889 | + for (j = 0; j < conf->nr_strip_zones; j++) { | |
6890 | + sz += sprintf(page + sz, " z%d=[", j); | |
6891 | + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | |
6892 | + sz += sprintf (page+sz, "%s/", partition_name( | |
6893 | + conf->strip_zone[j].dev[k]->dev)); | |
6894 | + sz--; | |
6895 | + sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n", | |
6896 | + conf->strip_zone[j].zone_offset, | |
6897 | + conf->strip_zone[j].dev_offset, | |
6898 | + conf->strip_zone[j].size); | |
6899 | + } | |
6900 | #endif | |
6901 | - sz+=sprintf (page+sz, " %dk chunks", 1<<FACTOR_SHIFT(FACTOR(mddev))); | |
6902 | - return sz; | |
6903 | + sz += sprintf(page + sz, " %dk chunks", mddev->param.chunk_size/1024); | |
6904 | + return sz; | |
6905 | } | |
6906 | ||
6907 | - | |
6908 | -static struct md_personality raid0_personality= | |
6909 | +static mdk_personality_t raid0_personality= | |
6910 | { | |
6911 | - "raid0", | |
6912 | - raid0_map, | |
6913 | - NULL, /* no special make_request */ | |
6914 | - NULL, /* no special end_request */ | |
6915 | - raid0_run, | |
6916 | - raid0_stop, | |
6917 | - raid0_status, | |
6918 | - NULL, /* no ioctls */ | |
6919 | - 0, | |
6920 | - NULL, /* no error_handler */ | |
6921 | - NULL, /* hot_add_disk */ | |
6922 | - NULL, /* hot_remove_disk */ | |
6923 | - NULL /* mark_spare */ | |
6924 | + "raid0", | |
6925 | + raid0_map, | |
6926 | + NULL, /* no special make_request */ | |
6927 | + NULL, /* no special end_request */ | |
6928 | + raid0_run, | |
6929 | + raid0_stop, | |
6930 | + raid0_status, | |
6931 | + NULL, /* no ioctls */ | |
6932 | + 0, | |
6933 | + NULL, /* no error_handler */ | |
6934 | + NULL, /* no diskop */ | |
6935 | + NULL, /* no stop resync */ | |
6936 | + NULL /* no restart resync */ | |
6937 | }; | |
6938 | ||
6939 | - | |
6940 | #ifndef MODULE | |
6941 | ||
6942 | void raid0_init (void) | |
6943 | { | |
6944 | - register_md_personality (RAID0, &raid0_personality); | |
6945 | + register_md_personality (RAID0, &raid0_personality); | |
6946 | } | |
6947 | ||
6948 | #else | |
6949 | ||
6950 | int init_module (void) | |
6951 | { | |
6952 | - return (register_md_personality (RAID0, &raid0_personality)); | |
6953 | + return (register_md_personality (RAID0, &raid0_personality)); | |
6954 | } | |
6955 | ||
6956 | void cleanup_module (void) | |
6957 | { | |
6958 | - unregister_md_personality (RAID0); | |
6959 | + unregister_md_personality (RAID0); | |
6960 | } | |
6961 | ||
6962 | #endif | |
6963 | + | |
6964 | diff -ruN linux.orig/drivers/block/raid1.c linux-2.2.16/drivers/block/raid1.c | |
6965 | --- linux.orig/drivers/block/raid1.c Thu May 4 02:16:33 2000 | |
6966 | +++ linux-2.2.16/drivers/block/raid1.c Fri Jun 9 11:37:45 2000 | |
6967 | @@ -1,6 +1,6 @@ | |
6968 | -/************************************************************************ | |
6969 | +/* | |
6970 | * raid1.c : Multiple Devices driver for Linux | |
6971 | - * Copyright (C) 1996 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
6972 | + * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
6973 | * | |
6974 | * RAID-1 management functions. | |
6975 | * | |
6976 | @@ -15,50 +15,52 @@ | |
6977 | */ | |
6978 | ||
6979 | #include <linux/module.h> | |
6980 | -#include <linux/locks.h> | |
6981 | #include <linux/malloc.h> | |
6982 | -#include <linux/md.h> | |
6983 | -#include <linux/raid1.h> | |
6984 | -#include <asm/bitops.h> | |
6985 | +#include <linux/raid/raid1.h> | |
6986 | #include <asm/atomic.h> | |
6987 | ||
6988 | #define MAJOR_NR MD_MAJOR | |
6989 | #define MD_DRIVER | |
6990 | #define MD_PERSONALITY | |
6991 | ||
6992 | -/* | |
6993 | - * The following can be used to debug the driver | |
6994 | - */ | |
6995 | -/*#define RAID1_DEBUG*/ | |
6996 | -#ifdef RAID1_DEBUG | |
6997 | -#define PRINTK(x) do { printk x; } while (0); | |
6998 | -#else | |
6999 | -#define PRINTK(x) do { ; } while (0); | |
7000 | -#endif | |
7001 | +#define MAX_LINEAR_SECTORS 128 | |
7002 | ||
7003 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) | |
7004 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) | |
7005 | ||
7006 | -static struct md_personality raid1_personality; | |
7007 | -static struct md_thread *raid1_thread = NULL; | |
7008 | +static mdk_personality_t raid1_personality; | |
7009 | struct buffer_head *raid1_retry_list = NULL; | |
7010 | ||
7011 | -static int __raid1_map (struct md_dev *mddev, kdev_t *rdev, | |
7012 | +static void * raid1_kmalloc (int size) | |
7013 | +{ | |
7014 | + void * ptr; | |
7015 | + /* | |
7016 | + * now we are rather fault tolerant than nice, but | |
7017 | + * there are a couple of places in the RAID code where we | |
7018 | + * simply can not afford to fail an allocation because | |
7019 | + * there is no failure return path (eg. make_request()) | |
7020 | + */ | |
7021 | + while (!(ptr = kmalloc (sizeof (raid1_conf_t), GFP_KERNEL))) | |
7022 | + printk ("raid1: out of memory, retrying...\n"); | |
7023 | + | |
7024 | + memset(ptr, 0, size); | |
7025 | + return ptr; | |
7026 | +} | |
7027 | + | |
7028 | +static int __raid1_map (mddev_t *mddev, kdev_t *rdev, | |
7029 | unsigned long *rsector, unsigned long size) | |
7030 | { | |
7031 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
7032 | - int i, n = raid_conf->raid_disks; | |
7033 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
7034 | + int i, disks = MD_SB_DISKS; | |
7035 | ||
7036 | /* | |
7037 | * Later we do read balancing on the read side | |
7038 | * now we use the first available disk. | |
7039 | */ | |
7040 | ||
7041 | - PRINTK(("raid1_map().\n")); | |
7042 | - | |
7043 | - for (i=0; i<n; i++) { | |
7044 | - if (raid_conf->mirrors[i].operational) { | |
7045 | - *rdev = raid_conf->mirrors[i].dev; | |
7046 | + for (i = 0; i < disks; i++) { | |
7047 | + if (conf->mirrors[i].operational) { | |
7048 | + *rdev = conf->mirrors[i].dev; | |
7049 | return (0); | |
7050 | } | |
7051 | } | |
7052 | @@ -67,29 +69,29 @@ | |
7053 | return (-1); | |
7054 | } | |
7055 | ||
7056 | -static int raid1_map (struct md_dev *mddev, kdev_t *rdev, | |
7057 | +static int raid1_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
7058 | unsigned long *rsector, unsigned long size) | |
7059 | { | |
7060 | return 0; | |
7061 | } | |
7062 | ||
7063 | -void raid1_reschedule_retry (struct buffer_head *bh) | |
7064 | +static void raid1_reschedule_retry (struct buffer_head *bh) | |
7065 | { | |
7066 | struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_dev_id); | |
7067 | - | |
7068 | - PRINTK(("raid1_reschedule_retry().\n")); | |
7069 | + mddev_t *mddev = r1_bh->mddev; | |
7070 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
7071 | ||
7072 | r1_bh->next_retry = raid1_retry_list; | |
7073 | raid1_retry_list = bh; | |
7074 | - md_wakeup_thread(raid1_thread); | |
7075 | + md_wakeup_thread(conf->thread); | |
7076 | } | |
7077 | ||
7078 | /* | |
7079 | - * raid1_end_buffer_io() is called when we have finished servicing a mirrored | |
7080 | + * raid1_end_bh_io() is called when we have finished servicing a mirrored | |
7081 | * operation and are ready to return a success/failure code to the buffer | |
7082 | * cache layer. | |
7083 | */ | |
7084 | -static inline void raid1_end_buffer_io(struct raid1_bh *r1_bh, int uptodate) | |
7085 | +static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate) | |
7086 | { | |
7087 | struct buffer_head *bh = r1_bh->master_bh; | |
7088 | ||
7089 | @@ -97,8 +99,6 @@ | |
7090 | kfree(r1_bh); | |
7091 | } | |
7092 | ||
7093 | -int raid1_one_error=0; | |
7094 | - | |
7095 | void raid1_end_request (struct buffer_head *bh, int uptodate) | |
7096 | { | |
7097 | struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_dev_id); | |
7098 | @@ -106,12 +106,7 @@ | |
7099 | ||
7100 | save_flags(flags); | |
7101 | cli(); | |
7102 | - PRINTK(("raid1_end_request().\n")); | |
7103 | ||
7104 | - if (raid1_one_error) { | |
7105 | - raid1_one_error=0; | |
7106 | - uptodate=0; | |
7107 | - } | |
7108 | /* | |
7109 | * this branch is our 'one mirror IO has finished' event handler: | |
7110 | */ | |
7111 | @@ -136,15 +131,11 @@ | |
7112 | */ | |
7113 | ||
7114 | if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) { | |
7115 | - | |
7116 | - PRINTK(("raid1_end_request(), read branch.\n")); | |
7117 | - | |
7118 | /* | |
7119 | * we have only one buffer_head on the read side | |
7120 | */ | |
7121 | if (uptodate) { | |
7122 | - PRINTK(("raid1_end_request(), read branch, uptodate.\n")); | |
7123 | - raid1_end_buffer_io(r1_bh, uptodate); | |
7124 | + raid1_end_bh_io(r1_bh, uptodate); | |
7125 | restore_flags(flags); | |
7126 | return; | |
7127 | } | |
7128 | @@ -152,71 +143,56 @@ | |
7129 | * oops, read error: | |
7130 | */ | |
7131 | printk(KERN_ERR "raid1: %s: rescheduling block %lu\n", | |
7132 | - kdevname(bh->b_dev), bh->b_blocknr); | |
7133 | - raid1_reschedule_retry (bh); | |
7134 | + partition_name(bh->b_dev), bh->b_blocknr); | |
7135 | + raid1_reschedule_retry(bh); | |
7136 | restore_flags(flags); | |
7137 | return; | |
7138 | } | |
7139 | ||
7140 | /* | |
7141 | - * WRITE or WRITEA. | |
7142 | - */ | |
7143 | - PRINTK(("raid1_end_request(), write branch.\n")); | |
7144 | - | |
7145 | - /* | |
7146 | + * WRITE: | |
7147 | + * | |
7148 | * Let's see if all mirrored write operations have finished | |
7149 | - * already [we have irqs off, so we can decrease]: | |
7150 | + * already. | |
7151 | */ | |
7152 | ||
7153 | - if (!--r1_bh->remaining) { | |
7154 | - struct md_dev *mddev = r1_bh->mddev; | |
7155 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
7156 | - int i, n = raid_conf->raid_disks; | |
7157 | + if (atomic_dec_and_test(&r1_bh->remaining)) { | |
7158 | + int i, disks = MD_SB_DISKS; | |
7159 | ||
7160 | - PRINTK(("raid1_end_request(), remaining == 0.\n")); | |
7161 | + for ( i = 0; i < disks; i++) | |
7162 | + if (r1_bh->mirror_bh[i]) | |
7163 | + kfree(r1_bh->mirror_bh[i]); | |
7164 | ||
7165 | - for ( i=0; i<n; i++) | |
7166 | - if (r1_bh->mirror_bh[i]) kfree(r1_bh->mirror_bh[i]); | |
7167 | - | |
7168 | - raid1_end_buffer_io(r1_bh, test_bit(BH_Uptodate, &r1_bh->state)); | |
7169 | + raid1_end_bh_io(r1_bh, test_bit(BH_Uptodate, &r1_bh->state)); | |
7170 | } | |
7171 | - else PRINTK(("raid1_end_request(), remaining == %u.\n", r1_bh->remaining)); | |
7172 | restore_flags(flags); | |
7173 | } | |
7174 | ||
7175 | -/* This routine checks if the undelying device is an md device and in that | |
7176 | - * case it maps the blocks before putting the request on the queue | |
7177 | +/* | |
7178 | + * This routine checks if the undelying device is an md device | |
7179 | + * and in that case it maps the blocks before putting the | |
7180 | + * request on the queue | |
7181 | */ | |
7182 | -static inline void | |
7183 | -map_and_make_request (int rw, struct buffer_head *bh) | |
7184 | +static void map_and_make_request (int rw, struct buffer_head *bh) | |
7185 | { | |
7186 | if (MAJOR (bh->b_rdev) == MD_MAJOR) | |
7187 | - md_map (MINOR (bh->b_rdev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9); | |
7188 | + md_map (bh->b_rdev, &bh->b_rdev, | |
7189 | + &bh->b_rsector, bh->b_size >> 9); | |
7190 | clear_bit(BH_Lock, &bh->b_state); | |
7191 | make_request (MAJOR (bh->b_rdev), rw, bh); | |
7192 | } | |
7193 | ||
7194 | -static int | |
7195 | -raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh) | |
7196 | +static int raid1_make_request (mddev_t *mddev, int rw, | |
7197 | + struct buffer_head * bh) | |
7198 | { | |
7199 | - | |
7200 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
7201 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
7202 | struct buffer_head *mirror_bh[MD_SB_DISKS], *bh_req; | |
7203 | struct raid1_bh * r1_bh; | |
7204 | - int n = raid_conf->raid_disks, i, sum_bhs = 0, switch_disks = 0, sectors; | |
7205 | + int disks = MD_SB_DISKS; | |
7206 | + int i, sum_bhs = 0, switch_disks = 0, sectors, lowprio = 0; | |
7207 | struct mirror_info *mirror; | |
7208 | ||
7209 | - PRINTK(("raid1_make_request().\n")); | |
7210 | - | |
7211 | - while (!( /* FIXME: now we are rather fault tolerant than nice */ | |
7212 | - r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_KERNEL) | |
7213 | - ) ) | |
7214 | - { | |
7215 | - printk ("raid1_make_request(#1): out of memory\n"); | |
7216 | - current->policy |= SCHED_YIELD; | |
7217 | - schedule(); | |
7218 | - } | |
7219 | - memset (r1_bh, 0, sizeof (struct raid1_bh)); | |
7220 | + r1_bh = raid1_kmalloc (sizeof (struct raid1_bh)); | |
7221 | ||
7222 | /* | |
7223 | * make_request() can abort the operation when READA or WRITEA are being | |
7224 | @@ -227,43 +203,65 @@ | |
7225 | if (rw == READA) rw = READ; | |
7226 | if (rw == WRITEA) rw = WRITE; | |
7227 | ||
7228 | - if (rw == WRITE || rw == WRITEA) | |
7229 | - mark_buffer_clean(bh); /* Too early ? */ | |
7230 | + if (rw == WRITE) { | |
7231 | + /* | |
7232 | + * Too early ? | |
7233 | + */ | |
7234 | + mark_buffer_clean(bh); | |
7235 | + /* | |
7236 | + * not too early. we _first_ clean the bh, then we start | |
7237 | + * the IO, then when the IO has finished, we unlock the | |
7238 | + * bh and mark it uptodate. This way we do not miss the | |
7239 | + * case when the bh got dirty again during the IO. | |
7240 | + */ | |
7241 | + } | |
7242 | + | |
7243 | + /* | |
7244 | + * special flag for 'lowprio' reconstruction requests ... | |
7245 | + */ | |
7246 | + if (buffer_lowprio(bh)) | |
7247 | + lowprio = 1; | |
7248 | ||
7249 | /* | |
7250 | - * i think the read and write branch should be separated completely, since we want | |
7251 | - * to do read balancing on the read side for example. Comments? :) --mingo | |
7252 | + * i think the read and write branch should be separated completely, | |
7253 | + * since we want to do read balancing on the read side for example. | |
7254 | + * Comments? :) --mingo | |
7255 | */ | |
7256 | ||
7257 | r1_bh->master_bh=bh; | |
7258 | r1_bh->mddev=mddev; | |
7259 | r1_bh->cmd = rw; | |
7260 | ||
7261 | - if (rw==READ || rw==READA) { | |
7262 | - int last_used = raid_conf->last_used; | |
7263 | - PRINTK(("raid1_make_request(), read branch.\n")); | |
7264 | - mirror = raid_conf->mirrors + last_used; | |
7265 | + if (rw==READ) { | |
7266 | + int last_used = conf->last_used; | |
7267 | + | |
7268 | + /* | |
7269 | + * read balancing logic: | |
7270 | + */ | |
7271 | + mirror = conf->mirrors + last_used; | |
7272 | bh->b_rdev = mirror->dev; | |
7273 | sectors = bh->b_size >> 9; | |
7274 | - if (bh->b_blocknr * sectors == raid_conf->next_sect) { | |
7275 | - raid_conf->sect_count += sectors; | |
7276 | - if (raid_conf->sect_count >= mirror->sect_limit) | |
7277 | + | |
7278 | + if (bh->b_blocknr * sectors == conf->next_sect) { | |
7279 | + conf->sect_count += sectors; | |
7280 | + if (conf->sect_count >= mirror->sect_limit) | |
7281 | switch_disks = 1; | |
7282 | } else | |
7283 | switch_disks = 1; | |
7284 | - raid_conf->next_sect = (bh->b_blocknr + 1) * sectors; | |
7285 | - if (switch_disks) { | |
7286 | - PRINTK(("read-balancing: switching %d -> %d (%d sectors)\n", last_used, mirror->next, raid_conf->sect_count)); | |
7287 | - raid_conf->sect_count = 0; | |
7288 | - last_used = raid_conf->last_used = mirror->next; | |
7289 | + conf->next_sect = (bh->b_blocknr + 1) * sectors; | |
7290 | + /* | |
7291 | + * Do not switch disks if full resync is in progress ... | |
7292 | + */ | |
7293 | + if (switch_disks && !conf->resync_mirrors) { | |
7294 | + conf->sect_count = 0; | |
7295 | + last_used = conf->last_used = mirror->next; | |
7296 | /* | |
7297 | - * Do not switch to write-only disks ... resyncing | |
7298 | - * is in progress | |
7299 | + * Do not switch to write-only disks ... | |
7300 | + * reconstruction is in progress | |
7301 | */ | |
7302 | - while (raid_conf->mirrors[last_used].write_only) | |
7303 | - raid_conf->last_used = raid_conf->mirrors[last_used].next; | |
7304 | + while (conf->mirrors[last_used].write_only) | |
7305 | + conf->last_used = conf->mirrors[last_used].next; | |
7306 | } | |
7307 | - PRINTK (("raid1 read queue: %d %d\n", MAJOR (bh->b_rdev), MINOR (bh->b_rdev))); | |
7308 | bh_req = &r1_bh->bh_req; | |
7309 | memcpy(bh_req, bh, sizeof(*bh)); | |
7310 | bh_req->b_end_io = raid1_end_request; | |
7311 | @@ -273,13 +271,12 @@ | |
7312 | } | |
7313 | ||
7314 | /* | |
7315 | - * WRITE or WRITEA. | |
7316 | + * WRITE: | |
7317 | */ | |
7318 | - PRINTK(("raid1_make_request(n=%d), write branch.\n",n)); | |
7319 | ||
7320 | - for (i = 0; i < n; i++) { | |
7321 | + for (i = 0; i < disks; i++) { | |
7322 | ||
7323 | - if (!raid_conf->mirrors [i].operational) { | |
7324 | + if (!conf->mirrors[i].operational) { | |
7325 | /* | |
7326 | * the r1_bh->mirror_bh[i] pointer remains NULL | |
7327 | */ | |
7328 | @@ -287,89 +284,91 @@ | |
7329 | continue; | |
7330 | } | |
7331 | ||
7332 | + /* | |
7333 | + * special case for reconstruction ... | |
7334 | + */ | |
7335 | + if (lowprio && (i == conf->last_used)) { | |
7336 | + mirror_bh[i] = NULL; | |
7337 | + continue; | |
7338 | + } | |
7339 | + | |
7340 | + /* | |
7341 | + * We should use a private pool (size depending on NR_REQUEST), | |
7342 | + * to avoid writes filling up the memory with bhs | |
7343 | + * | |
7344 | + * Such pools are much faster than kmalloc anyways (so we waste | |
7345 | + * almost nothing by not using the master bh when writing and | |
7346 | + * win alot of cleanness) but for now we are cool enough. --mingo | |
7347 | + * | |
7348 | + * It's safe to sleep here, buffer heads cannot be used in a shared | |
7349 | + * manner in the write branch. Look how we lock the buffer at the | |
7350 | + * beginning of this function to grok the difference ;) | |
7351 | + */ | |
7352 | + mirror_bh[i] = raid1_kmalloc(sizeof(struct buffer_head)); | |
7353 | + /* | |
7354 | + * prepare mirrored bh (fields ordered for max mem throughput): | |
7355 | + */ | |
7356 | + mirror_bh[i]->b_blocknr = bh->b_blocknr; | |
7357 | + mirror_bh[i]->b_dev = bh->b_dev; | |
7358 | + mirror_bh[i]->b_rdev = conf->mirrors[i].dev; | |
7359 | + mirror_bh[i]->b_rsector = bh->b_rsector; | |
7360 | + mirror_bh[i]->b_state = (1<<BH_Req) | (1<<BH_Dirty); | |
7361 | + if (lowprio) | |
7362 | + mirror_bh[i]->b_state |= (1<<BH_LowPrio); | |
7363 | + | |
7364 | + mirror_bh[i]->b_count = 1; | |
7365 | + mirror_bh[i]->b_size = bh->b_size; | |
7366 | + mirror_bh[i]->b_data = bh->b_data; | |
7367 | + mirror_bh[i]->b_list = BUF_LOCKED; | |
7368 | + mirror_bh[i]->b_end_io = raid1_end_request; | |
7369 | + mirror_bh[i]->b_dev_id = r1_bh; | |
7370 | + | |
7371 | + r1_bh->mirror_bh[i] = mirror_bh[i]; | |
7372 | + sum_bhs++; | |
7373 | + } | |
7374 | + | |
7375 | + md_atomic_set(&r1_bh->remaining, sum_bhs); | |
7376 | + | |
7377 | /* | |
7378 | - * We should use a private pool (size depending on NR_REQUEST), | |
7379 | - * to avoid writes filling up the memory with bhs | |
7380 | - * | |
7381 | - * Such pools are much faster than kmalloc anyways (so we waste almost | |
7382 | - * nothing by not using the master bh when writing and win alot of cleanness) | |
7383 | - * | |
7384 | - * but for now we are cool enough. --mingo | |
7385 | - * | |
7386 | - * It's safe to sleep here, buffer heads cannot be used in a shared | |
7387 | - * manner in the write branch. Look how we lock the buffer at the beginning | |
7388 | - * of this function to grok the difference ;) | |
7389 | - */ | |
7390 | - while (!( /* FIXME: now we are rather fault tolerant than nice */ | |
7391 | - mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_KERNEL) | |
7392 | - ) ) | |
7393 | - { | |
7394 | - printk ("raid1_make_request(#2): out of memory\n"); | |
7395 | - current->policy |= SCHED_YIELD; | |
7396 | - schedule(); | |
7397 | - } | |
7398 | - memset (mirror_bh[i], 0, sizeof (struct buffer_head)); | |
7399 | - | |
7400 | - /* | |
7401 | - * prepare mirrored bh (fields ordered for max mem throughput): | |
7402 | - */ | |
7403 | - mirror_bh [i]->b_blocknr = bh->b_blocknr; | |
7404 | - mirror_bh [i]->b_dev = bh->b_dev; | |
7405 | - mirror_bh [i]->b_rdev = raid_conf->mirrors [i].dev; | |
7406 | - mirror_bh [i]->b_rsector = bh->b_rsector; | |
7407 | - mirror_bh [i]->b_state = (1<<BH_Req) | (1<<BH_Dirty); | |
7408 | - mirror_bh [i]->b_count = 1; | |
7409 | - mirror_bh [i]->b_size = bh->b_size; | |
7410 | - mirror_bh [i]->b_data = bh->b_data; | |
7411 | - mirror_bh [i]->b_list = BUF_LOCKED; | |
7412 | - mirror_bh [i]->b_end_io = raid1_end_request; | |
7413 | - mirror_bh [i]->b_dev_id = r1_bh; | |
7414 | - | |
7415 | - r1_bh->mirror_bh[i] = mirror_bh[i]; | |
7416 | - sum_bhs++; | |
7417 | - } | |
7418 | - | |
7419 | - r1_bh->remaining = sum_bhs; | |
7420 | - | |
7421 | - PRINTK(("raid1_make_request(), write branch, sum_bhs=%d.\n",sum_bhs)); | |
7422 | - | |
7423 | - /* | |
7424 | - * We have to be a bit careful about the semaphore above, thats why we | |
7425 | - * start the requests separately. Since kmalloc() could fail, sleep and | |
7426 | - * make_request() can sleep too, this is the safer solution. Imagine, | |
7427 | - * end_request decreasing the semaphore before we could have set it up ... | |
7428 | - * We could play tricks with the semaphore (presetting it and correcting | |
7429 | - * at the end if sum_bhs is not 'n' but we have to do end_request by hand | |
7430 | - * if all requests finish until we had a chance to set up the semaphore | |
7431 | - * correctly ... lots of races). | |
7432 | - */ | |
7433 | - for (i = 0; i < n; i++) | |
7434 | - if (mirror_bh [i] != NULL) | |
7435 | - map_and_make_request (rw, mirror_bh [i]); | |
7436 | + * We have to be a bit careful about the semaphore above, thats | |
7437 | + * why we start the requests separately. Since kmalloc() could | |
7438 | + * fail, sleep and make_request() can sleep too, this is the | |
7439 | + * safer solution. Imagine, end_request decreasing the semaphore | |
7440 | + * before we could have set it up ... We could play tricks with | |
7441 | + * the semaphore (presetting it and correcting at the end if | |
7442 | + * sum_bhs is not 'n' but we have to do end_request by hand if | |
7443 | + * all requests finish until we had a chance to set up the | |
7444 | + * semaphore correctly ... lots of races). | |
7445 | + */ | |
7446 | + for (i = 0; i < disks; i++) | |
7447 | + if (mirror_bh[i]) | |
7448 | + map_and_make_request(rw, mirror_bh[i]); | |
7449 | ||
7450 | return (0); | |
7451 | } | |
7452 | ||
7453 | -static int raid1_status (char *page, int minor, struct md_dev *mddev) | |
7454 | +static int raid1_status (char *page, mddev_t *mddev) | |
7455 | { | |
7456 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
7457 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
7458 | int sz = 0, i; | |
7459 | ||
7460 | - sz += sprintf (page+sz, " [%d/%d] [", raid_conf->raid_disks, raid_conf->working_disks); | |
7461 | - for (i = 0; i < raid_conf->raid_disks; i++) | |
7462 | - sz += sprintf (page+sz, "%s", raid_conf->mirrors [i].operational ? "U" : "_"); | |
7463 | + sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, | |
7464 | + conf->working_disks); | |
7465 | + for (i = 0; i < conf->raid_disks; i++) | |
7466 | + sz += sprintf (page+sz, "%s", | |
7467 | + conf->mirrors[i].operational ? "U" : "_"); | |
7468 | sz += sprintf (page+sz, "]"); | |
7469 | return sz; | |
7470 | } | |
7471 | ||
7472 | -static void raid1_fix_links (struct raid1_data *raid_conf, int failed_index) | |
7473 | +static void unlink_disk (raid1_conf_t *conf, int target) | |
7474 | { | |
7475 | - int disks = raid_conf->raid_disks; | |
7476 | - int j; | |
7477 | + int disks = MD_SB_DISKS; | |
7478 | + int i; | |
7479 | ||
7480 | - for (j = 0; j < disks; j++) | |
7481 | - if (raid_conf->mirrors [j].next == failed_index) | |
7482 | - raid_conf->mirrors [j].next = raid_conf->mirrors [failed_index].next; | |
7483 | + for (i = 0; i < disks; i++) | |
7484 | + if (conf->mirrors[i].next == target) | |
7485 | + conf->mirrors[i].next = conf->mirrors[target].next; | |
7486 | } | |
7487 | ||
7488 | #define LAST_DISK KERN_ALERT \ | |
7489 | @@ -388,48 +387,53 @@ | |
7490 | #define ALREADY_SYNCING KERN_INFO \ | |
7491 | "raid1: syncing already in progress.\n" | |
7492 | ||
7493 | -static int raid1_error (struct md_dev *mddev, kdev_t dev) | |
7494 | +static void mark_disk_bad (mddev_t *mddev, int failed) | |
7495 | { | |
7496 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
7497 | - struct mirror_info *mirror; | |
7498 | - md_superblock_t *sb = mddev->sb; | |
7499 | - int disks = raid_conf->raid_disks; | |
7500 | - int i; | |
7501 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
7502 | + struct mirror_info *mirror = conf->mirrors+failed; | |
7503 | + mdp_super_t *sb = mddev->sb; | |
7504 | + | |
7505 | + mirror->operational = 0; | |
7506 | + unlink_disk(conf, failed); | |
7507 | + mark_disk_faulty(sb->disks+mirror->number); | |
7508 | + mark_disk_nonsync(sb->disks+mirror->number); | |
7509 | + mark_disk_inactive(sb->disks+mirror->number); | |
7510 | + sb->active_disks--; | |
7511 | + sb->working_disks--; | |
7512 | + sb->failed_disks++; | |
7513 | + mddev->sb_dirty = 1; | |
7514 | + md_wakeup_thread(conf->thread); | |
7515 | + conf->working_disks--; | |
7516 | + printk (DISK_FAILED, partition_name (mirror->dev), | |
7517 | + conf->working_disks); | |
7518 | +} | |
7519 | ||
7520 | - PRINTK(("raid1_error called\n")); | |
7521 | +static int raid1_error (mddev_t *mddev, kdev_t dev) | |
7522 | +{ | |
7523 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
7524 | + struct mirror_info * mirrors = conf->mirrors; | |
7525 | + int disks = MD_SB_DISKS; | |
7526 | + int i; | |
7527 | ||
7528 | - if (raid_conf->working_disks == 1) { | |
7529 | + if (conf->working_disks == 1) { | |
7530 | /* | |
7531 | * Uh oh, we can do nothing if this is our last disk, but | |
7532 | * first check if this is a queued request for a device | |
7533 | * which has just failed. | |
7534 | */ | |
7535 | - for (i = 0, mirror = raid_conf->mirrors; i < disks; | |
7536 | - i++, mirror++) | |
7537 | - if (mirror->dev == dev && !mirror->operational) | |
7538 | + for (i = 0; i < disks; i++) { | |
7539 | + if (mirrors[i].dev==dev && !mirrors[i].operational) | |
7540 | return 0; | |
7541 | + } | |
7542 | printk (LAST_DISK); | |
7543 | } else { | |
7544 | - /* Mark disk as unusable */ | |
7545 | - for (i = 0, mirror = raid_conf->mirrors; i < disks; | |
7546 | - i++, mirror++) { | |
7547 | - if (mirror->dev == dev && mirror->operational){ | |
7548 | - mirror->operational = 0; | |
7549 | - raid1_fix_links (raid_conf, i); | |
7550 | - sb->disks[mirror->number].state |= | |
7551 | - (1 << MD_FAULTY_DEVICE); | |
7552 | - sb->disks[mirror->number].state &= | |
7553 | - ~(1 << MD_SYNC_DEVICE); | |
7554 | - sb->disks[mirror->number].state &= | |
7555 | - ~(1 << MD_ACTIVE_DEVICE); | |
7556 | - sb->active_disks--; | |
7557 | - sb->working_disks--; | |
7558 | - sb->failed_disks++; | |
7559 | - mddev->sb_dirty = 1; | |
7560 | - md_wakeup_thread(raid1_thread); | |
7561 | - raid_conf->working_disks--; | |
7562 | - printk (DISK_FAILED, kdevname (dev), | |
7563 | - raid_conf->working_disks); | |
7564 | + /* | |
7565 | + * Mark disk as unusable | |
7566 | + */ | |
7567 | + for (i = 0; i < disks; i++) { | |
7568 | + if (mirrors[i].dev==dev && mirrors[i].operational) { | |
7569 | + mark_disk_bad (mddev, i); | |
7570 | + break; | |
7571 | } | |
7572 | } | |
7573 | } | |
7574 | @@ -442,219 +446,396 @@ | |
7575 | #undef START_SYNCING | |
7576 | ||
7577 | /* | |
7578 | - * This is the personality-specific hot-addition routine | |
7579 | + * Insert the spare disk into the drive-ring | |
7580 | */ | |
7581 | +static void link_disk(raid1_conf_t *conf, struct mirror_info *mirror) | |
7582 | +{ | |
7583 | + int j, next; | |
7584 | + int disks = MD_SB_DISKS; | |
7585 | + struct mirror_info *p = conf->mirrors; | |
7586 | + | |
7587 | + for (j = 0; j < disks; j++, p++) | |
7588 | + if (p->operational && !p->write_only) { | |
7589 | + next = p->next; | |
7590 | + p->next = mirror->raid_disk; | |
7591 | + mirror->next = next; | |
7592 | + return; | |
7593 | + } | |
7594 | ||
7595 | -#define NO_SUPERBLOCK KERN_ERR \ | |
7596 | -"raid1: cannot hot-add disk to the array with no RAID superblock\n" | |
7597 | + printk("raid1: bug: no read-operational devices\n"); | |
7598 | +} | |
7599 | ||
7600 | -#define WRONG_LEVEL KERN_ERR \ | |
7601 | -"raid1: hot-add: level of disk is not RAID-1\n" | |
7602 | +static void print_raid1_conf (raid1_conf_t *conf) | |
7603 | +{ | |
7604 | + int i; | |
7605 | + struct mirror_info *tmp; | |
7606 | ||
7607 | -#define HOT_ADD_SUCCEEDED KERN_INFO \ | |
7608 | -"raid1: device %s hot-added\n" | |
7609 | + printk("RAID1 conf printout:\n"); | |
7610 | + if (!conf) { | |
7611 | + printk("(conf==NULL)\n"); | |
7612 | + return; | |
7613 | + } | |
7614 | + printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks, | |
7615 | + conf->raid_disks, conf->nr_disks); | |
7616 | ||
7617 | -static int raid1_hot_add_disk (struct md_dev *mddev, kdev_t dev) | |
7618 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
7619 | + tmp = conf->mirrors + i; | |
7620 | + printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", | |
7621 | + i, tmp->spare,tmp->operational, | |
7622 | + tmp->number,tmp->raid_disk,tmp->used_slot, | |
7623 | + partition_name(tmp->dev)); | |
7624 | + } | |
7625 | +} | |
7626 | + | |
7627 | +static int raid1_diskop(mddev_t *mddev, mdp_disk_t **d, int state) | |
7628 | { | |
7629 | + int err = 0; | |
7630 | + int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; | |
7631 | + raid1_conf_t *conf = mddev->private; | |
7632 | + struct mirror_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; | |
7633 | unsigned long flags; | |
7634 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
7635 | - struct mirror_info *mirror; | |
7636 | - md_superblock_t *sb = mddev->sb; | |
7637 | - struct real_dev * realdev; | |
7638 | - int n; | |
7639 | + mdp_super_t *sb = mddev->sb; | |
7640 | + mdp_disk_t *failed_desc, *spare_desc, *added_desc; | |
7641 | + | |
7642 | + save_flags(flags); | |
7643 | + cli(); | |
7644 | ||
7645 | + print_raid1_conf(conf); | |
7646 | /* | |
7647 | - * The device has its superblock already read and it was found | |
7648 | - * to be consistent for generic RAID usage. Now we check whether | |
7649 | - * it's usable for RAID-1 hot addition. | |
7650 | + * find the disk ... | |
7651 | */ | |
7652 | + switch (state) { | |
7653 | ||
7654 | - n = mddev->nb_dev++; | |
7655 | - realdev = &mddev->devices[n]; | |
7656 | - if (!realdev->sb) { | |
7657 | - printk (NO_SUPERBLOCK); | |
7658 | - return -EINVAL; | |
7659 | - } | |
7660 | - if (realdev->sb->level != 1) { | |
7661 | - printk (WRONG_LEVEL); | |
7662 | - return -EINVAL; | |
7663 | + case DISKOP_SPARE_ACTIVE: | |
7664 | + | |
7665 | + /* | |
7666 | + * Find the failed disk within the RAID1 configuration ... | |
7667 | + * (this can only be in the first conf->working_disks part) | |
7668 | + */ | |
7669 | + for (i = 0; i < conf->raid_disks; i++) { | |
7670 | + tmp = conf->mirrors + i; | |
7671 | + if ((!tmp->operational && !tmp->spare) || | |
7672 | + !tmp->used_slot) { | |
7673 | + failed_disk = i; | |
7674 | + break; | |
7675 | + } | |
7676 | + } | |
7677 | + /* | |
7678 | + * When we activate a spare disk we _must_ have a disk in | |
7679 | + * the lower (active) part of the array to replace. | |
7680 | + */ | |
7681 | + if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { | |
7682 | + MD_BUG(); | |
7683 | + err = 1; | |
7684 | + goto abort; | |
7685 | + } | |
7686 | + /* fall through */ | |
7687 | + | |
7688 | + case DISKOP_SPARE_WRITE: | |
7689 | + case DISKOP_SPARE_INACTIVE: | |
7690 | + | |
7691 | + /* | |
7692 | + * Find the spare disk ... (can only be in the 'high' | |
7693 | + * area of the array) | |
7694 | + */ | |
7695 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
7696 | + tmp = conf->mirrors + i; | |
7697 | + if (tmp->spare && tmp->number == (*d)->number) { | |
7698 | + spare_disk = i; | |
7699 | + break; | |
7700 | + } | |
7701 | + } | |
7702 | + if (spare_disk == -1) { | |
7703 | + MD_BUG(); | |
7704 | + err = 1; | |
7705 | + goto abort; | |
7706 | + } | |
7707 | + break; | |
7708 | + | |
7709 | + case DISKOP_HOT_REMOVE_DISK: | |
7710 | + | |
7711 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
7712 | + tmp = conf->mirrors + i; | |
7713 | + if (tmp->used_slot && (tmp->number == (*d)->number)) { | |
7714 | + if (tmp->operational) { | |
7715 | + err = -EBUSY; | |
7716 | + goto abort; | |
7717 | + } | |
7718 | + removed_disk = i; | |
7719 | + break; | |
7720 | + } | |
7721 | + } | |
7722 | + if (removed_disk == -1) { | |
7723 | + MD_BUG(); | |
7724 | + err = 1; | |
7725 | + goto abort; | |
7726 | + } | |
7727 | + break; | |
7728 | + | |
7729 | + case DISKOP_HOT_ADD_DISK: | |
7730 | + | |
7731 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
7732 | + tmp = conf->mirrors + i; | |
7733 | + if (!tmp->used_slot) { | |
7734 | + added_disk = i; | |
7735 | + break; | |
7736 | + } | |
7737 | + } | |
7738 | + if (added_disk == -1) { | |
7739 | + MD_BUG(); | |
7740 | + err = 1; | |
7741 | + goto abort; | |
7742 | + } | |
7743 | + break; | |
7744 | } | |
7745 | - /* FIXME: are there other things left we could sanity-check? */ | |
7746 | ||
7747 | + switch (state) { | |
7748 | /* | |
7749 | - * We have to disable interrupts, as our RAID-1 state is used | |
7750 | - * from irq handlers as well. | |
7751 | + * Switch the spare disk to write-only mode: | |
7752 | */ | |
7753 | - save_flags(flags); | |
7754 | - cli(); | |
7755 | + case DISKOP_SPARE_WRITE: | |
7756 | + sdisk = conf->mirrors + spare_disk; | |
7757 | + sdisk->operational = 1; | |
7758 | + sdisk->write_only = 1; | |
7759 | + break; | |
7760 | + /* | |
7761 | + * Deactivate a spare disk: | |
7762 | + */ | |
7763 | + case DISKOP_SPARE_INACTIVE: | |
7764 | + sdisk = conf->mirrors + spare_disk; | |
7765 | + sdisk->operational = 0; | |
7766 | + sdisk->write_only = 0; | |
7767 | + break; | |
7768 | + /* | |
7769 | + * Activate (mark read-write) the (now sync) spare disk, | |
7770 | + * which means we switch it's 'raid position' (->raid_disk) | |
7771 | + * with the failed disk. (only the first 'conf->nr_disks' | |
7772 | + * slots are used for 'real' disks and we must preserve this | |
7773 | + * property) | |
7774 | + */ | |
7775 | + case DISKOP_SPARE_ACTIVE: | |
7776 | ||
7777 | - raid_conf->raid_disks++; | |
7778 | - mirror = raid_conf->mirrors+n; | |
7779 | + sdisk = conf->mirrors + spare_disk; | |
7780 | + fdisk = conf->mirrors + failed_disk; | |
7781 | ||
7782 | - mirror->number=n; | |
7783 | - mirror->raid_disk=n; | |
7784 | - mirror->dev=dev; | |
7785 | - mirror->next=0; /* FIXME */ | |
7786 | - mirror->sect_limit=128; | |
7787 | - | |
7788 | - mirror->operational=0; | |
7789 | - mirror->spare=1; | |
7790 | - mirror->write_only=0; | |
7791 | - | |
7792 | - sb->disks[n].state |= (1 << MD_FAULTY_DEVICE); | |
7793 | - sb->disks[n].state &= ~(1 << MD_SYNC_DEVICE); | |
7794 | - sb->disks[n].state &= ~(1 << MD_ACTIVE_DEVICE); | |
7795 | - sb->nr_disks++; | |
7796 | - sb->spare_disks++; | |
7797 | + spare_desc = &sb->disks[sdisk->number]; | |
7798 | + failed_desc = &sb->disks[fdisk->number]; | |
7799 | ||
7800 | - restore_flags(flags); | |
7801 | + if (spare_desc != *d) { | |
7802 | + MD_BUG(); | |
7803 | + err = 1; | |
7804 | + goto abort; | |
7805 | + } | |
7806 | ||
7807 | - md_update_sb(MINOR(dev)); | |
7808 | + if (spare_desc->raid_disk != sdisk->raid_disk) { | |
7809 | + MD_BUG(); | |
7810 | + err = 1; | |
7811 | + goto abort; | |
7812 | + } | |
7813 | + | |
7814 | + if (sdisk->raid_disk != spare_disk) { | |
7815 | + MD_BUG(); | |
7816 | + err = 1; | |
7817 | + goto abort; | |
7818 | + } | |
7819 | ||
7820 | - printk (HOT_ADD_SUCCEEDED, kdevname(realdev->dev)); | |
7821 | + if (failed_desc->raid_disk != fdisk->raid_disk) { | |
7822 | + MD_BUG(); | |
7823 | + err = 1; | |
7824 | + goto abort; | |
7825 | + } | |
7826 | ||
7827 | - return 0; | |
7828 | -} | |
7829 | + if (fdisk->raid_disk != failed_disk) { | |
7830 | + MD_BUG(); | |
7831 | + err = 1; | |
7832 | + goto abort; | |
7833 | + } | |
7834 | ||
7835 | -#undef NO_SUPERBLOCK | |
7836 | -#undef WRONG_LEVEL | |
7837 | -#undef HOT_ADD_SUCCEEDED | |
7838 | + /* | |
7839 | + * do the switch finally | |
7840 | + */ | |
7841 | + xchg_values(*spare_desc, *failed_desc); | |
7842 | + xchg_values(*fdisk, *sdisk); | |
7843 | ||
7844 | -/* | |
7845 | - * Insert the spare disk into the drive-ring | |
7846 | - */ | |
7847 | -static void add_ring(struct raid1_data *raid_conf, struct mirror_info *mirror) | |
7848 | -{ | |
7849 | - int j, next; | |
7850 | - struct mirror_info *p = raid_conf->mirrors; | |
7851 | + /* | |
7852 | + * (careful, 'failed' and 'spare' are switched from now on) | |
7853 | + * | |
7854 | + * we want to preserve linear numbering and we want to | |
7855 | + * give the proper raid_disk number to the now activated | |
7856 | + * disk. (this means we switch back these values) | |
7857 | + */ | |
7858 | + | |
7859 | + xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); | |
7860 | + xchg_values(sdisk->raid_disk, fdisk->raid_disk); | |
7861 | + xchg_values(spare_desc->number, failed_desc->number); | |
7862 | + xchg_values(sdisk->number, fdisk->number); | |
7863 | ||
7864 | - for (j = 0; j < raid_conf->raid_disks; j++, p++) | |
7865 | - if (p->operational && !p->write_only) { | |
7866 | - next = p->next; | |
7867 | - p->next = mirror->raid_disk; | |
7868 | - mirror->next = next; | |
7869 | - return; | |
7870 | - } | |
7871 | - printk("raid1: bug: no read-operational devices\n"); | |
7872 | -} | |
7873 | + *d = failed_desc; | |
7874 | ||
7875 | -static int raid1_mark_spare(struct md_dev *mddev, md_descriptor_t *spare, | |
7876 | - int state) | |
7877 | -{ | |
7878 | - int i = 0, failed_disk = -1; | |
7879 | - struct raid1_data *raid_conf = mddev->private; | |
7880 | - struct mirror_info *mirror = raid_conf->mirrors; | |
7881 | - md_descriptor_t *descriptor; | |
7882 | - unsigned long flags; | |
7883 | + if (sdisk->dev == MKDEV(0,0)) | |
7884 | + sdisk->used_slot = 0; | |
7885 | + /* | |
7886 | + * this really activates the spare. | |
7887 | + */ | |
7888 | + fdisk->spare = 0; | |
7889 | + fdisk->write_only = 0; | |
7890 | + link_disk(conf, fdisk); | |
7891 | ||
7892 | - for (i = 0; i < MD_SB_DISKS; i++, mirror++) { | |
7893 | - if (mirror->spare && mirror->number == spare->number) | |
7894 | - goto found; | |
7895 | - } | |
7896 | - return 1; | |
7897 | -found: | |
7898 | - for (i = 0, mirror = raid_conf->mirrors; i < raid_conf->raid_disks; | |
7899 | - i++, mirror++) | |
7900 | - if (!mirror->operational) | |
7901 | - failed_disk = i; | |
7902 | + /* | |
7903 | + * if we activate a spare, we definitely replace a | |
7904 | + * non-operational disk slot in the 'low' area of | |
7905 | + * the disk array. | |
7906 | + */ | |
7907 | ||
7908 | - save_flags(flags); | |
7909 | - cli(); | |
7910 | - switch (state) { | |
7911 | - case SPARE_WRITE: | |
7912 | - mirror->operational = 1; | |
7913 | - mirror->write_only = 1; | |
7914 | - raid_conf->raid_disks = MAX(raid_conf->raid_disks, | |
7915 | - mirror->raid_disk + 1); | |
7916 | - break; | |
7917 | - case SPARE_INACTIVE: | |
7918 | - mirror->operational = 0; | |
7919 | - mirror->write_only = 0; | |
7920 | - break; | |
7921 | - case SPARE_ACTIVE: | |
7922 | - mirror->spare = 0; | |
7923 | - mirror->write_only = 0; | |
7924 | - raid_conf->working_disks++; | |
7925 | - add_ring(raid_conf, mirror); | |
7926 | - | |
7927 | - if (failed_disk != -1) { | |
7928 | - descriptor = &mddev->sb->disks[raid_conf->mirrors[failed_disk].number]; | |
7929 | - i = spare->raid_disk; | |
7930 | - spare->raid_disk = descriptor->raid_disk; | |
7931 | - descriptor->raid_disk = i; | |
7932 | - } | |
7933 | - break; | |
7934 | - default: | |
7935 | - printk("raid1_mark_spare: bug: state == %d\n", state); | |
7936 | - restore_flags(flags); | |
7937 | - return 1; | |
7938 | + conf->working_disks++; | |
7939 | + | |
7940 | + break; | |
7941 | + | |
7942 | + case DISKOP_HOT_REMOVE_DISK: | |
7943 | + rdisk = conf->mirrors + removed_disk; | |
7944 | + | |
7945 | + if (rdisk->spare && (removed_disk < conf->raid_disks)) { | |
7946 | + MD_BUG(); | |
7947 | + err = 1; | |
7948 | + goto abort; | |
7949 | + } | |
7950 | + rdisk->dev = MKDEV(0,0); | |
7951 | + rdisk->used_slot = 0; | |
7952 | + conf->nr_disks--; | |
7953 | + break; | |
7954 | + | |
7955 | + case DISKOP_HOT_ADD_DISK: | |
7956 | + adisk = conf->mirrors + added_disk; | |
7957 | + added_desc = *d; | |
7958 | + | |
7959 | + if (added_disk != added_desc->number) { | |
7960 | + MD_BUG(); | |
7961 | + err = 1; | |
7962 | + goto abort; | |
7963 | + } | |
7964 | + | |
7965 | + adisk->number = added_desc->number; | |
7966 | + adisk->raid_disk = added_desc->raid_disk; | |
7967 | + adisk->dev = MKDEV(added_desc->major,added_desc->minor); | |
7968 | + | |
7969 | + adisk->operational = 0; | |
7970 | + adisk->write_only = 0; | |
7971 | + adisk->spare = 1; | |
7972 | + adisk->used_slot = 1; | |
7973 | + conf->nr_disks++; | |
7974 | + | |
7975 | + break; | |
7976 | + | |
7977 | + default: | |
7978 | + MD_BUG(); | |
7979 | + err = 1; | |
7980 | + goto abort; | |
7981 | } | |
7982 | +abort: | |
7983 | restore_flags(flags); | |
7984 | - return 0; | |
7985 | + print_raid1_conf(conf); | |
7986 | + return err; | |
7987 | } | |
7988 | ||
7989 | + | |
7990 | +#define IO_ERROR KERN_ALERT \ | |
7991 | +"raid1: %s: unrecoverable I/O read error for block %lu\n" | |
7992 | + | |
7993 | +#define REDIRECT_SECTOR KERN_ERR \ | |
7994 | +"raid1: %s: redirecting sector %lu to another mirror\n" | |
7995 | + | |
7996 | /* | |
7997 | * This is a kernel thread which: | |
7998 | * | |
7999 | * 1. Retries failed read operations on working mirrors. | |
8000 | * 2. Updates the raid superblock when problems encounter. | |
8001 | */ | |
8002 | -void raid1d (void *data) | |
8003 | +static void raid1d (void *data) | |
8004 | { | |
8005 | struct buffer_head *bh; | |
8006 | kdev_t dev; | |
8007 | unsigned long flags; | |
8008 | - struct raid1_bh * r1_bh; | |
8009 | - struct md_dev *mddev; | |
8010 | + struct raid1_bh *r1_bh; | |
8011 | + mddev_t *mddev; | |
8012 | ||
8013 | - PRINTK(("raid1d() active\n")); | |
8014 | - save_flags(flags); | |
8015 | - cli(); | |
8016 | while (raid1_retry_list) { | |
8017 | + save_flags(flags); | |
8018 | + cli(); | |
8019 | bh = raid1_retry_list; | |
8020 | r1_bh = (struct raid1_bh *)(bh->b_dev_id); | |
8021 | raid1_retry_list = r1_bh->next_retry; | |
8022 | restore_flags(flags); | |
8023 | ||
8024 | - mddev = md_dev + MINOR(bh->b_dev); | |
8025 | + mddev = kdev_to_mddev(bh->b_dev); | |
8026 | if (mddev->sb_dirty) { | |
8027 | - printk("dirty sb detected, updating.\n"); | |
8028 | + printk(KERN_INFO "dirty sb detected, updating.\n"); | |
8029 | mddev->sb_dirty = 0; | |
8030 | - md_update_sb(MINOR(bh->b_dev)); | |
8031 | + md_update_sb(mddev); | |
8032 | } | |
8033 | dev = bh->b_rdev; | |
8034 | - __raid1_map (md_dev + MINOR(bh->b_dev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9); | |
8035 | + __raid1_map (mddev, &bh->b_rdev, &bh->b_rsector, | |
8036 | + bh->b_size >> 9); | |
8037 | if (bh->b_rdev == dev) { | |
8038 | - printk (KERN_ALERT | |
8039 | - "raid1: %s: unrecoverable I/O read error for block %lu\n", | |
8040 | - kdevname(bh->b_dev), bh->b_blocknr); | |
8041 | - raid1_end_buffer_io(r1_bh, 0); | |
8042 | + printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr); | |
8043 | + raid1_end_bh_io(r1_bh, 0); | |
8044 | } else { | |
8045 | - printk (KERN_ERR "raid1: %s: redirecting sector %lu to another mirror\n", | |
8046 | - kdevname(bh->b_dev), bh->b_blocknr); | |
8047 | + printk (REDIRECT_SECTOR, | |
8048 | + partition_name(bh->b_dev), bh->b_blocknr); | |
8049 | map_and_make_request (r1_bh->cmd, bh); | |
8050 | } | |
8051 | - cli(); | |
8052 | } | |
8053 | - restore_flags(flags); | |
8054 | +} | |
8055 | +#undef IO_ERROR | |
8056 | +#undef REDIRECT_SECTOR | |
8057 | + | |
8058 | +/* | |
8059 | + * Private kernel thread to reconstruct mirrors after an unclean | |
8060 | + * shutdown. | |
8061 | + */ | |
8062 | +static void raid1syncd (void *data) | |
8063 | +{ | |
8064 | + raid1_conf_t *conf = data; | |
8065 | + mddev_t *mddev = conf->mddev; | |
8066 | + | |
8067 | + if (!conf->resync_mirrors) | |
8068 | + return; | |
8069 | + if (conf->resync_mirrors == 2) | |
8070 | + return; | |
8071 | + down(&mddev->recovery_sem); | |
8072 | + if (md_do_sync(mddev, NULL)) { | |
8073 | + up(&mddev->recovery_sem); | |
8074 | + return; | |
8075 | + } | |
8076 | + /* | |
8077 | + * Only if everything went Ok. | |
8078 | + */ | |
8079 | + conf->resync_mirrors = 0; | |
8080 | + up(&mddev->recovery_sem); | |
8081 | } | |
8082 | ||
8083 | + | |
8084 | /* | |
8085 | * This will catch the scenario in which one of the mirrors was | |
8086 | * mounted as a normal device rather than as a part of a raid set. | |
8087 | + * | |
8088 | + * check_consistency is very personality-dependent, eg. RAID5 cannot | |
8089 | + * do this check, it uses another method. | |
8090 | */ | |
8091 | -static int __check_consistency (struct md_dev *mddev, int row) | |
8092 | +static int __check_consistency (mddev_t *mddev, int row) | |
8093 | { | |
8094 | - struct raid1_data *raid_conf = mddev->private; | |
8095 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
8096 | + int disks = MD_SB_DISKS; | |
8097 | kdev_t dev; | |
8098 | struct buffer_head *bh = NULL; | |
8099 | int i, rc = 0; | |
8100 | char *buffer = NULL; | |
8101 | ||
8102 | - for (i = 0; i < raid_conf->raid_disks; i++) { | |
8103 | - if (!raid_conf->mirrors[i].operational) | |
8104 | + for (i = 0; i < disks; i++) { | |
8105 | + printk("(checking disk %d)\n",i); | |
8106 | + if (!conf->mirrors[i].operational) | |
8107 | continue; | |
8108 | - dev = raid_conf->mirrors[i].dev; | |
8109 | + printk("(really checking disk %d)\n",i); | |
8110 | + dev = conf->mirrors[i].dev; | |
8111 | set_blocksize(dev, 4096); | |
8112 | if ((bh = bread(dev, row / 4, 4096)) == NULL) | |
8113 | break; | |
8114 | @@ -683,167 +864,342 @@ | |
8115 | return rc; | |
8116 | } | |
8117 | ||
8118 | -static int check_consistency (struct md_dev *mddev) | |
8119 | +static int check_consistency (mddev_t *mddev) | |
8120 | { | |
8121 | - int size = mddev->sb->size; | |
8122 | - int row; | |
8123 | + if (__check_consistency(mddev, 0)) | |
8124 | +/* | |
8125 | + * we do not do this currently, as it's perfectly possible to | |
8126 | + * have an inconsistent array when it's freshly created. Only | |
8127 | + * newly written data has to be consistent. | |
8128 | + */ | |
8129 | + return 0; | |
8130 | ||
8131 | - for (row = 0; row < size; row += size / 8) | |
8132 | - if (__check_consistency(mddev, row)) | |
8133 | - return 1; | |
8134 | return 0; | |
8135 | } | |
8136 | ||
8137 | -static int raid1_run (int minor, struct md_dev *mddev) | |
8138 | +#define INVALID_LEVEL KERN_WARNING \ | |
8139 | +"raid1: md%d: raid level not set to mirroring (%d)\n" | |
8140 | + | |
8141 | +#define NO_SB KERN_ERR \ | |
8142 | +"raid1: disabled mirror %s (couldn't access raid superblock)\n" | |
8143 | + | |
8144 | +#define ERRORS KERN_ERR \ | |
8145 | +"raid1: disabled mirror %s (errors detected)\n" | |
8146 | + | |
8147 | +#define NOT_IN_SYNC KERN_ERR \ | |
8148 | +"raid1: disabled mirror %s (not in sync)\n" | |
8149 | + | |
8150 | +#define INCONSISTENT KERN_ERR \ | |
8151 | +"raid1: disabled mirror %s (inconsistent descriptor)\n" | |
8152 | + | |
8153 | +#define ALREADY_RUNNING KERN_ERR \ | |
8154 | +"raid1: disabled mirror %s (mirror %d already operational)\n" | |
8155 | + | |
8156 | +#define OPERATIONAL KERN_INFO \ | |
8157 | +"raid1: device %s operational as mirror %d\n" | |
8158 | + | |
8159 | +#define MEM_ERROR KERN_ERR \ | |
8160 | +"raid1: couldn't allocate memory for md%d\n" | |
8161 | + | |
8162 | +#define SPARE KERN_INFO \ | |
8163 | +"raid1: spare disk %s\n" | |
8164 | + | |
8165 | +#define NONE_OPERATIONAL KERN_ERR \ | |
8166 | +"raid1: no operational mirrors for md%d\n" | |
8167 | + | |
8168 | +#define RUNNING_CKRAID KERN_ERR \ | |
8169 | +"raid1: detected mirror differences -- running resync\n" | |
8170 | + | |
8171 | +#define ARRAY_IS_ACTIVE KERN_INFO \ | |
8172 | +"raid1: raid set md%d active with %d out of %d mirrors\n" | |
8173 | + | |
8174 | +#define THREAD_ERROR KERN_ERR \ | |
8175 | +"raid1: couldn't allocate thread for md%d\n" | |
8176 | + | |
8177 | +#define START_RESYNC KERN_WARNING \ | |
8178 | +"raid1: raid set md%d not clean; reconstructing mirrors\n" | |
8179 | + | |
8180 | +static int raid1_run (mddev_t *mddev) | |
8181 | { | |
8182 | - struct raid1_data *raid_conf; | |
8183 | - int i, j, raid_disk; | |
8184 | - md_superblock_t *sb = mddev->sb; | |
8185 | - md_descriptor_t *descriptor; | |
8186 | - struct real_dev *realdev; | |
8187 | + raid1_conf_t *conf; | |
8188 | + int i, j, disk_idx; | |
8189 | + struct mirror_info *disk; | |
8190 | + mdp_super_t *sb = mddev->sb; | |
8191 | + mdp_disk_t *descriptor; | |
8192 | + mdk_rdev_t *rdev; | |
8193 | + struct md_list_head *tmp; | |
8194 | + int start_recovery = 0; | |
8195 | ||
8196 | MOD_INC_USE_COUNT; | |
8197 | ||
8198 | if (sb->level != 1) { | |
8199 | - printk("raid1: %s: raid level not set to mirroring (%d)\n", | |
8200 | - kdevname(MKDEV(MD_MAJOR, minor)), sb->level); | |
8201 | - MOD_DEC_USE_COUNT; | |
8202 | - return -EIO; | |
8203 | - } | |
8204 | - /**** | |
8205 | - * copy the now verified devices into our private RAID1 bookkeeping | |
8206 | - * area. [whatever we allocate in raid1_run(), should be freed in | |
8207 | - * raid1_stop()] | |
8208 | + printk(INVALID_LEVEL, mdidx(mddev), sb->level); | |
8209 | + goto out; | |
8210 | + } | |
8211 | + /* | |
8212 | + * copy the already verified devices into our private RAID1 | |
8213 | + * bookkeeping area. [whatever we allocate in raid1_run(), | |
8214 | + * should be freed in raid1_stop()] | |
8215 | */ | |
8216 | ||
8217 | - while (!( /* FIXME: now we are rather fault tolerant than nice */ | |
8218 | - mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL) | |
8219 | - ) ) | |
8220 | - { | |
8221 | - printk ("raid1_run(): out of memory\n"); | |
8222 | - current->policy |= SCHED_YIELD; | |
8223 | - schedule(); | |
8224 | - } | |
8225 | - raid_conf = mddev->private; | |
8226 | - memset(raid_conf, 0, sizeof(*raid_conf)); | |
8227 | - | |
8228 | - PRINTK(("raid1_run(%d) called.\n", minor)); | |
8229 | - | |
8230 | - for (i = 0; i < mddev->nb_dev; i++) { | |
8231 | - realdev = &mddev->devices[i]; | |
8232 | - if (!realdev->sb) { | |
8233 | - printk(KERN_ERR "raid1: disabled mirror %s (couldn't access raid superblock)\n", kdevname(realdev->dev)); | |
8234 | + conf = raid1_kmalloc(sizeof(raid1_conf_t)); | |
8235 | + mddev->private = conf; | |
8236 | + if (!conf) { | |
8237 | + printk(MEM_ERROR, mdidx(mddev)); | |
8238 | + goto out; | |
8239 | + } | |
8240 | + | |
8241 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
8242 | + if (rdev->faulty) { | |
8243 | + printk(ERRORS, partition_name(rdev->dev)); | |
8244 | + } else { | |
8245 | + if (!rdev->sb) { | |
8246 | + MD_BUG(); | |
8247 | + continue; | |
8248 | + } | |
8249 | + } | |
8250 | + if (rdev->desc_nr == -1) { | |
8251 | + MD_BUG(); | |
8252 | continue; | |
8253 | } | |
8254 | - | |
8255 | - /* | |
8256 | - * This is important -- we are using the descriptor on | |
8257 | - * the disk only to get a pointer to the descriptor on | |
8258 | - * the main superblock, which might be more recent. | |
8259 | - */ | |
8260 | - descriptor = &sb->disks[realdev->sb->descriptor.number]; | |
8261 | - if (descriptor->state & (1 << MD_FAULTY_DEVICE)) { | |
8262 | - printk(KERN_ERR "raid1: disabled mirror %s (errors detected)\n", kdevname(realdev->dev)); | |
8263 | + descriptor = &sb->disks[rdev->desc_nr]; | |
8264 | + disk_idx = descriptor->raid_disk; | |
8265 | + disk = conf->mirrors + disk_idx; | |
8266 | + | |
8267 | + if (disk_faulty(descriptor)) { | |
8268 | + disk->number = descriptor->number; | |
8269 | + disk->raid_disk = disk_idx; | |
8270 | + disk->dev = rdev->dev; | |
8271 | + disk->sect_limit = MAX_LINEAR_SECTORS; | |
8272 | + disk->operational = 0; | |
8273 | + disk->write_only = 0; | |
8274 | + disk->spare = 0; | |
8275 | + disk->used_slot = 1; | |
8276 | continue; | |
8277 | } | |
8278 | - if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) { | |
8279 | - if (!(descriptor->state & (1 << MD_SYNC_DEVICE))) { | |
8280 | - printk(KERN_ERR "raid1: disabled mirror %s (not in sync)\n", kdevname(realdev->dev)); | |
8281 | + if (disk_active(descriptor)) { | |
8282 | + if (!disk_sync(descriptor)) { | |
8283 | + printk(NOT_IN_SYNC, | |
8284 | + partition_name(rdev->dev)); | |
8285 | continue; | |
8286 | } | |
8287 | - raid_disk = descriptor->raid_disk; | |
8288 | - if (descriptor->number > sb->nr_disks || raid_disk > sb->raid_disks) { | |
8289 | - printk(KERN_ERR "raid1: disabled mirror %s (inconsistent descriptor)\n", kdevname(realdev->dev)); | |
8290 | + if ((descriptor->number > MD_SB_DISKS) || | |
8291 | + (disk_idx > sb->raid_disks)) { | |
8292 | + | |
8293 | + printk(INCONSISTENT, | |
8294 | + partition_name(rdev->dev)); | |
8295 | continue; | |
8296 | } | |
8297 | - if (raid_conf->mirrors[raid_disk].operational) { | |
8298 | - printk(KERN_ERR "raid1: disabled mirror %s (mirror %d already operational)\n", kdevname(realdev->dev), raid_disk); | |
8299 | + if (disk->operational) { | |
8300 | + printk(ALREADY_RUNNING, | |
8301 | + partition_name(rdev->dev), | |
8302 | + disk_idx); | |
8303 | continue; | |
8304 | } | |
8305 | - printk(KERN_INFO "raid1: device %s operational as mirror %d\n", kdevname(realdev->dev), raid_disk); | |
8306 | - raid_conf->mirrors[raid_disk].number = descriptor->number; | |
8307 | - raid_conf->mirrors[raid_disk].raid_disk = raid_disk; | |
8308 | - raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev; | |
8309 | - raid_conf->mirrors[raid_disk].operational = 1; | |
8310 | - raid_conf->mirrors[raid_disk].sect_limit = 128; | |
8311 | - raid_conf->working_disks++; | |
8312 | + printk(OPERATIONAL, partition_name(rdev->dev), | |
8313 | + disk_idx); | |
8314 | + disk->number = descriptor->number; | |
8315 | + disk->raid_disk = disk_idx; | |
8316 | + disk->dev = rdev->dev; | |
8317 | + disk->sect_limit = MAX_LINEAR_SECTORS; | |
8318 | + disk->operational = 1; | |
8319 | + disk->write_only = 0; | |
8320 | + disk->spare = 0; | |
8321 | + disk->used_slot = 1; | |
8322 | + conf->working_disks++; | |
8323 | } else { | |
8324 | /* | |
8325 | * Must be a spare disk .. | |
8326 | */ | |
8327 | - printk(KERN_INFO "raid1: spare disk %s\n", kdevname(realdev->dev)); | |
8328 | - raid_disk = descriptor->raid_disk; | |
8329 | - raid_conf->mirrors[raid_disk].number = descriptor->number; | |
8330 | - raid_conf->mirrors[raid_disk].raid_disk = raid_disk; | |
8331 | - raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev; | |
8332 | - raid_conf->mirrors[raid_disk].sect_limit = 128; | |
8333 | - | |
8334 | - raid_conf->mirrors[raid_disk].operational = 0; | |
8335 | - raid_conf->mirrors[raid_disk].write_only = 0; | |
8336 | - raid_conf->mirrors[raid_disk].spare = 1; | |
8337 | - } | |
8338 | - } | |
8339 | - if (!raid_conf->working_disks) { | |
8340 | - printk(KERN_ERR "raid1: no operational mirrors for %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
8341 | - kfree(raid_conf); | |
8342 | - mddev->private = NULL; | |
8343 | - MOD_DEC_USE_COUNT; | |
8344 | - return -EIO; | |
8345 | - } | |
8346 | - | |
8347 | - raid_conf->raid_disks = sb->raid_disks; | |
8348 | - raid_conf->mddev = mddev; | |
8349 | - | |
8350 | - for (j = 0; !raid_conf->mirrors[j].operational; j++); | |
8351 | - raid_conf->last_used = j; | |
8352 | - for (i = raid_conf->raid_disks - 1; i >= 0; i--) { | |
8353 | - if (raid_conf->mirrors[i].operational) { | |
8354 | - PRINTK(("raid_conf->mirrors[%d].next == %d\n", i, j)); | |
8355 | - raid_conf->mirrors[i].next = j; | |
8356 | + printk(SPARE, partition_name(rdev->dev)); | |
8357 | + disk->number = descriptor->number; | |
8358 | + disk->raid_disk = disk_idx; | |
8359 | + disk->dev = rdev->dev; | |
8360 | + disk->sect_limit = MAX_LINEAR_SECTORS; | |
8361 | + disk->operational = 0; | |
8362 | + disk->write_only = 0; | |
8363 | + disk->spare = 1; | |
8364 | + disk->used_slot = 1; | |
8365 | + } | |
8366 | + } | |
8367 | + if (!conf->working_disks) { | |
8368 | + printk(NONE_OPERATIONAL, mdidx(mddev)); | |
8369 | + goto out_free_conf; | |
8370 | + } | |
8371 | + | |
8372 | + conf->raid_disks = sb->raid_disks; | |
8373 | + conf->nr_disks = sb->nr_disks; | |
8374 | + conf->mddev = mddev; | |
8375 | + | |
8376 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
8377 | + | |
8378 | + descriptor = sb->disks+i; | |
8379 | + disk_idx = descriptor->raid_disk; | |
8380 | + disk = conf->mirrors + disk_idx; | |
8381 | + | |
8382 | + if (disk_faulty(descriptor) && (disk_idx < conf->raid_disks) && | |
8383 | + !disk->used_slot) { | |
8384 | + | |
8385 | + disk->number = descriptor->number; | |
8386 | + disk->raid_disk = disk_idx; | |
8387 | + disk->dev = MKDEV(0,0); | |
8388 | + | |
8389 | + disk->operational = 0; | |
8390 | + disk->write_only = 0; | |
8391 | + disk->spare = 0; | |
8392 | + disk->used_slot = 1; | |
8393 | + } | |
8394 | + } | |
8395 | + | |
8396 | + /* | |
8397 | + * find the first working one and use it as a starting point | |
8398 | + * to read balancing. | |
8399 | + */ | |
8400 | + for (j = 0; !conf->mirrors[j].operational; j++) | |
8401 | + /* nothing */; | |
8402 | + conf->last_used = j; | |
8403 | + | |
8404 | + /* | |
8405 | + * initialize the 'working disks' list. | |
8406 | + */ | |
8407 | + for (i = conf->raid_disks - 1; i >= 0; i--) { | |
8408 | + if (conf->mirrors[i].operational) { | |
8409 | + conf->mirrors[i].next = j; | |
8410 | j = i; | |
8411 | } | |
8412 | } | |
8413 | ||
8414 | - if (check_consistency(mddev)) { | |
8415 | - printk(KERN_ERR "raid1: detected mirror differences -- run ckraid\n"); | |
8416 | - sb->state |= 1 << MD_SB_ERRORS; | |
8417 | - kfree(raid_conf); | |
8418 | - mddev->private = NULL; | |
8419 | - MOD_DEC_USE_COUNT; | |
8420 | - return -EIO; | |
8421 | + if (conf->working_disks != sb->raid_disks) { | |
8422 | + printk(KERN_ALERT "raid1: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); | |
8423 | + start_recovery = 1; | |
8424 | } | |
8425 | ||
8426 | + if (!start_recovery && (sb->state & (1 << MD_SB_CLEAN))) { | |
8427 | + /* | |
8428 | + * we do sanity checks even if the device says | |
8429 | + * it's clean ... | |
8430 | + */ | |
8431 | + if (check_consistency(mddev)) { | |
8432 | + printk(RUNNING_CKRAID); | |
8433 | + sb->state &= ~(1 << MD_SB_CLEAN); | |
8434 | + } | |
8435 | + } | |
8436 | + | |
8437 | + { | |
8438 | + const char * name = "raid1d"; | |
8439 | + | |
8440 | + conf->thread = md_register_thread(raid1d, conf, name); | |
8441 | + if (!conf->thread) { | |
8442 | + printk(THREAD_ERROR, mdidx(mddev)); | |
8443 | + goto out_free_conf; | |
8444 | + } | |
8445 | + } | |
8446 | + | |
8447 | + if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN))) { | |
8448 | + const char * name = "raid1syncd"; | |
8449 | + | |
8450 | + conf->resync_thread = md_register_thread(raid1syncd, conf,name); | |
8451 | + if (!conf->resync_thread) { | |
8452 | + printk(THREAD_ERROR, mdidx(mddev)); | |
8453 | + goto out_free_conf; | |
8454 | + } | |
8455 | + | |
8456 | + printk(START_RESYNC, mdidx(mddev)); | |
8457 | + conf->resync_mirrors = 1; | |
8458 | + md_wakeup_thread(conf->resync_thread); | |
8459 | + } | |
8460 | + | |
8461 | /* | |
8462 | * Regenerate the "device is in sync with the raid set" bit for | |
8463 | * each device. | |
8464 | */ | |
8465 | - for (i = 0; i < sb->nr_disks ; i++) { | |
8466 | - sb->disks[i].state &= ~(1 << MD_SYNC_DEVICE); | |
8467 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
8468 | + mark_disk_nonsync(sb->disks+i); | |
8469 | for (j = 0; j < sb->raid_disks; j++) { | |
8470 | - if (!raid_conf->mirrors[j].operational) | |
8471 | + if (!conf->mirrors[j].operational) | |
8472 | continue; | |
8473 | - if (sb->disks[i].number == raid_conf->mirrors[j].number) | |
8474 | - sb->disks[i].state |= 1 << MD_SYNC_DEVICE; | |
8475 | + if (sb->disks[i].number == conf->mirrors[j].number) | |
8476 | + mark_disk_sync(sb->disks+i); | |
8477 | } | |
8478 | } | |
8479 | - sb->active_disks = raid_conf->working_disks; | |
8480 | + sb->active_disks = conf->working_disks; | |
8481 | ||
8482 | - printk("raid1: raid set %s active with %d out of %d mirrors\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks); | |
8483 | - /* Ok, everything is just fine now */ | |
8484 | - return (0); | |
8485 | + if (start_recovery) | |
8486 | + md_recover_arrays(); | |
8487 | + | |
8488 | + | |
8489 | + printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks); | |
8490 | + /* | |
8491 | + * Ok, everything is just fine now | |
8492 | + */ | |
8493 | + return 0; | |
8494 | + | |
8495 | +out_free_conf: | |
8496 | + kfree(conf); | |
8497 | + mddev->private = NULL; | |
8498 | +out: | |
8499 | + MOD_DEC_USE_COUNT; | |
8500 | + return -EIO; | |
8501 | +} | |
8502 | + | |
8503 | +#undef INVALID_LEVEL | |
8504 | +#undef NO_SB | |
8505 | +#undef ERRORS | |
8506 | +#undef NOT_IN_SYNC | |
8507 | +#undef INCONSISTENT | |
8508 | +#undef ALREADY_RUNNING | |
8509 | +#undef OPERATIONAL | |
8510 | +#undef SPARE | |
8511 | +#undef NONE_OPERATIONAL | |
8512 | +#undef RUNNING_CKRAID | |
8513 | +#undef ARRAY_IS_ACTIVE | |
8514 | + | |
8515 | +static int raid1_stop_resync (mddev_t *mddev) | |
8516 | +{ | |
8517 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
8518 | + | |
8519 | + if (conf->resync_thread) { | |
8520 | + if (conf->resync_mirrors) { | |
8521 | + conf->resync_mirrors = 2; | |
8522 | + md_interrupt_thread(conf->resync_thread); | |
8523 | + printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n"); | |
8524 | + return 1; | |
8525 | + } | |
8526 | + return 0; | |
8527 | + } | |
8528 | + return 0; | |
8529 | +} | |
8530 | + | |
8531 | +static int raid1_restart_resync (mddev_t *mddev) | |
8532 | +{ | |
8533 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
8534 | + | |
8535 | + if (conf->resync_mirrors) { | |
8536 | + if (!conf->resync_thread) { | |
8537 | + MD_BUG(); | |
8538 | + return 0; | |
8539 | + } | |
8540 | + conf->resync_mirrors = 1; | |
8541 | + md_wakeup_thread(conf->resync_thread); | |
8542 | + return 1; | |
8543 | + } | |
8544 | + return 0; | |
8545 | } | |
8546 | ||
8547 | -static int raid1_stop (int minor, struct md_dev *mddev) | |
8548 | +static int raid1_stop (mddev_t *mddev) | |
8549 | { | |
8550 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
8551 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
8552 | ||
8553 | - kfree (raid_conf); | |
8554 | + md_unregister_thread(conf->thread); | |
8555 | + if (conf->resync_thread) | |
8556 | + md_unregister_thread(conf->resync_thread); | |
8557 | + kfree(conf); | |
8558 | mddev->private = NULL; | |
8559 | MOD_DEC_USE_COUNT; | |
8560 | return 0; | |
8561 | } | |
8562 | ||
8563 | -static struct md_personality raid1_personality= | |
8564 | +static mdk_personality_t raid1_personality= | |
8565 | { | |
8566 | "raid1", | |
8567 | raid1_map, | |
8568 | @@ -855,15 +1211,13 @@ | |
8569 | NULL, /* no ioctls */ | |
8570 | 0, | |
8571 | raid1_error, | |
8572 | - raid1_hot_add_disk, | |
8573 | - /* raid1_hot_remove_drive */ NULL, | |
8574 | - raid1_mark_spare | |
8575 | + raid1_diskop, | |
8576 | + raid1_stop_resync, | |
8577 | + raid1_restart_resync | |
8578 | }; | |
8579 | ||
8580 | int raid1_init (void) | |
8581 | { | |
8582 | - if ((raid1_thread = md_register_thread(raid1d, NULL)) == NULL) | |
8583 | - return -EBUSY; | |
8584 | return register_md_personality (RAID1, &raid1_personality); | |
8585 | } | |
8586 | ||
8587 | @@ -875,7 +1229,6 @@ | |
8588 | ||
8589 | void cleanup_module (void) | |
8590 | { | |
8591 | - md_unregister_thread (raid1_thread); | |
8592 | unregister_md_personality (RAID1); | |
8593 | } | |
8594 | #endif | |
8595 | diff -ruN linux.orig/drivers/block/raid5.c linux-2.2.16/drivers/block/raid5.c | |
8596 | --- linux.orig/drivers/block/raid5.c Fri May 8 09:17:13 1998 | |
8597 | +++ linux-2.2.16/drivers/block/raid5.c Fri Jun 9 11:37:45 2000 | |
8598 | @@ -1,4 +1,4 @@ | |
8599 | -/***************************************************************************** | |
8600 | +/* | |
8601 | * raid5.c : Multiple Devices driver for Linux | |
8602 | * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
8603 | * | |
8604 | @@ -14,16 +14,15 @@ | |
8605 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
8606 | */ | |
8607 | ||
8608 | + | |
8609 | #include <linux/module.h> | |
8610 | #include <linux/locks.h> | |
8611 | #include <linux/malloc.h> | |
8612 | -#include <linux/md.h> | |
8613 | -#include <linux/raid5.h> | |
8614 | +#include <linux/raid/raid5.h> | |
8615 | #include <asm/bitops.h> | |
8616 | #include <asm/atomic.h> | |
8617 | -#include <asm/md.h> | |
8618 | ||
8619 | -static struct md_personality raid5_personality; | |
8620 | +static mdk_personality_t raid5_personality; | |
8621 | ||
8622 | /* | |
8623 | * Stripe cache | |
8624 | @@ -33,7 +32,7 @@ | |
8625 | #define HASH_PAGES_ORDER 0 | |
8626 | #define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) | |
8627 | #define HASH_MASK (NR_HASH - 1) | |
8628 | -#define stripe_hash(raid_conf, sect, size) ((raid_conf)->stripe_hashtbl[((sect) / (size >> 9)) & HASH_MASK]) | |
8629 | +#define stripe_hash(conf, sect, size) ((conf)->stripe_hashtbl[((sect) / (size >> 9)) & HASH_MASK]) | |
8630 | ||
8631 | /* | |
8632 | * The following can be used to debug the driver | |
8633 | @@ -46,6 +45,8 @@ | |
8634 | #define PRINTK(x) do { ; } while (0) | |
8635 | #endif | |
8636 | ||
8637 | +static void print_raid5_conf (raid5_conf_t *conf); | |
8638 | + | |
8639 | static inline int stripe_locked(struct stripe_head *sh) | |
8640 | { | |
8641 | return test_bit(STRIPE_LOCKED, &sh->state); | |
8642 | @@ -61,32 +62,32 @@ | |
8643 | */ | |
8644 | static inline void lock_stripe(struct stripe_head *sh) | |
8645 | { | |
8646 | - struct raid5_data *raid_conf = sh->raid_conf; | |
8647 | - if (!test_and_set_bit(STRIPE_LOCKED, &sh->state)) { | |
8648 | + raid5_conf_t *conf = sh->raid_conf; | |
8649 | + if (!md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) { | |
8650 | PRINTK(("locking stripe %lu\n", sh->sector)); | |
8651 | - raid_conf->nr_locked_stripes++; | |
8652 | + conf->nr_locked_stripes++; | |
8653 | } | |
8654 | } | |
8655 | ||
8656 | static inline void unlock_stripe(struct stripe_head *sh) | |
8657 | { | |
8658 | - struct raid5_data *raid_conf = sh->raid_conf; | |
8659 | - if (test_and_clear_bit(STRIPE_LOCKED, &sh->state)) { | |
8660 | + raid5_conf_t *conf = sh->raid_conf; | |
8661 | + if (md_test_and_clear_bit(STRIPE_LOCKED, &sh->state)) { | |
8662 | PRINTK(("unlocking stripe %lu\n", sh->sector)); | |
8663 | - raid_conf->nr_locked_stripes--; | |
8664 | + conf->nr_locked_stripes--; | |
8665 | wake_up(&sh->wait); | |
8666 | } | |
8667 | } | |
8668 | ||
8669 | static inline void finish_stripe(struct stripe_head *sh) | |
8670 | { | |
8671 | - struct raid5_data *raid_conf = sh->raid_conf; | |
8672 | + raid5_conf_t *conf = sh->raid_conf; | |
8673 | unlock_stripe(sh); | |
8674 | sh->cmd = STRIPE_NONE; | |
8675 | sh->phase = PHASE_COMPLETE; | |
8676 | - raid_conf->nr_pending_stripes--; | |
8677 | - raid_conf->nr_cached_stripes++; | |
8678 | - wake_up(&raid_conf->wait_for_stripe); | |
8679 | + conf->nr_pending_stripes--; | |
8680 | + conf->nr_cached_stripes++; | |
8681 | + wake_up(&conf->wait_for_stripe); | |
8682 | } | |
8683 | ||
8684 | void __wait_on_stripe(struct stripe_head *sh) | |
8685 | @@ -114,7 +115,7 @@ | |
8686 | __wait_on_stripe(sh); | |
8687 | } | |
8688 | ||
8689 | -static inline void remove_hash(struct raid5_data *raid_conf, struct stripe_head *sh) | |
8690 | +static inline void remove_hash(raid5_conf_t *conf, struct stripe_head *sh) | |
8691 | { | |
8692 | PRINTK(("remove_hash(), stripe %lu\n", sh->sector)); | |
8693 | ||
8694 | @@ -123,21 +124,22 @@ | |
8695 | sh->hash_next->hash_pprev = sh->hash_pprev; | |
8696 | *sh->hash_pprev = sh->hash_next; | |
8697 | sh->hash_pprev = NULL; | |
8698 | - raid_conf->nr_hashed_stripes--; | |
8699 | + conf->nr_hashed_stripes--; | |
8700 | } | |
8701 | } | |
8702 | ||
8703 | -static inline void insert_hash(struct raid5_data *raid_conf, struct stripe_head *sh) | |
8704 | +static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) | |
8705 | { | |
8706 | - struct stripe_head **shp = &stripe_hash(raid_conf, sh->sector, sh->size); | |
8707 | + struct stripe_head **shp = &stripe_hash(conf, sh->sector, sh->size); | |
8708 | ||
8709 | - PRINTK(("insert_hash(), stripe %lu, nr_hashed_stripes %d\n", sh->sector, raid_conf->nr_hashed_stripes)); | |
8710 | + PRINTK(("insert_hash(), stripe %lu, nr_hashed_stripes %d\n", | |
8711 | + sh->sector, conf->nr_hashed_stripes)); | |
8712 | ||
8713 | if ((sh->hash_next = *shp) != NULL) | |
8714 | (*shp)->hash_pprev = &sh->hash_next; | |
8715 | *shp = sh; | |
8716 | sh->hash_pprev = shp; | |
8717 | - raid_conf->nr_hashed_stripes++; | |
8718 | + conf->nr_hashed_stripes++; | |
8719 | } | |
8720 | ||
8721 | static struct buffer_head *get_free_buffer(struct stripe_head *sh, int b_size) | |
8722 | @@ -145,13 +147,15 @@ | |
8723 | struct buffer_head *bh; | |
8724 | unsigned long flags; | |
8725 | ||
8726 | - save_flags(flags); | |
8727 | - cli(); | |
8728 | - if ((bh = sh->buffer_pool) == NULL) | |
8729 | - return NULL; | |
8730 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
8731 | + bh = sh->buffer_pool; | |
8732 | + if (!bh) | |
8733 | + goto out_unlock; | |
8734 | sh->buffer_pool = bh->b_next; | |
8735 | bh->b_size = b_size; | |
8736 | - restore_flags(flags); | |
8737 | +out_unlock: | |
8738 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
8739 | + | |
8740 | return bh; | |
8741 | } | |
8742 | ||
8743 | @@ -160,12 +164,14 @@ | |
8744 | struct buffer_head *bh; | |
8745 | unsigned long flags; | |
8746 | ||
8747 | - save_flags(flags); | |
8748 | - cli(); | |
8749 | - if ((bh = sh->bh_pool) == NULL) | |
8750 | - return NULL; | |
8751 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
8752 | + bh = sh->bh_pool; | |
8753 | + if (!bh) | |
8754 | + goto out_unlock; | |
8755 | sh->bh_pool = bh->b_next; | |
8756 | - restore_flags(flags); | |
8757 | +out_unlock: | |
8758 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
8759 | + | |
8760 | return bh; | |
8761 | } | |
8762 | ||
8763 | @@ -173,54 +179,52 @@ | |
8764 | { | |
8765 | unsigned long flags; | |
8766 | ||
8767 | - save_flags(flags); | |
8768 | - cli(); | |
8769 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
8770 | bh->b_next = sh->buffer_pool; | |
8771 | sh->buffer_pool = bh; | |
8772 | - restore_flags(flags); | |
8773 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
8774 | } | |
8775 | ||
8776 | static void put_free_bh(struct stripe_head *sh, struct buffer_head *bh) | |
8777 | { | |
8778 | unsigned long flags; | |
8779 | ||
8780 | - save_flags(flags); | |
8781 | - cli(); | |
8782 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
8783 | bh->b_next = sh->bh_pool; | |
8784 | sh->bh_pool = bh; | |
8785 | - restore_flags(flags); | |
8786 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
8787 | } | |
8788 | ||
8789 | -static struct stripe_head *get_free_stripe(struct raid5_data *raid_conf) | |
8790 | +static struct stripe_head *get_free_stripe(raid5_conf_t *conf) | |
8791 | { | |
8792 | struct stripe_head *sh; | |
8793 | unsigned long flags; | |
8794 | ||
8795 | save_flags(flags); | |
8796 | cli(); | |
8797 | - if ((sh = raid_conf->free_sh_list) == NULL) { | |
8798 | + if ((sh = conf->free_sh_list) == NULL) { | |
8799 | restore_flags(flags); | |
8800 | return NULL; | |
8801 | } | |
8802 | - raid_conf->free_sh_list = sh->free_next; | |
8803 | - raid_conf->nr_free_sh--; | |
8804 | - if (!raid_conf->nr_free_sh && raid_conf->free_sh_list) | |
8805 | + conf->free_sh_list = sh->free_next; | |
8806 | + conf->nr_free_sh--; | |
8807 | + if (!conf->nr_free_sh && conf->free_sh_list) | |
8808 | printk ("raid5: bug: free_sh_list != NULL, nr_free_sh == 0\n"); | |
8809 | restore_flags(flags); | |
8810 | - if (sh->hash_pprev || sh->nr_pending || sh->count) | |
8811 | + if (sh->hash_pprev || md_atomic_read(&sh->nr_pending) || sh->count) | |
8812 | printk("get_free_stripe(): bug\n"); | |
8813 | return sh; | |
8814 | } | |
8815 | ||
8816 | -static void put_free_stripe(struct raid5_data *raid_conf, struct stripe_head *sh) | |
8817 | +static void put_free_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |
8818 | { | |
8819 | unsigned long flags; | |
8820 | ||
8821 | save_flags(flags); | |
8822 | cli(); | |
8823 | - sh->free_next = raid_conf->free_sh_list; | |
8824 | - raid_conf->free_sh_list = sh; | |
8825 | - raid_conf->nr_free_sh++; | |
8826 | + sh->free_next = conf->free_sh_list; | |
8827 | + conf->free_sh_list = sh; | |
8828 | + conf->nr_free_sh++; | |
8829 | restore_flags(flags); | |
8830 | } | |
8831 | ||
8832 | @@ -324,8 +328,8 @@ | |
8833 | ||
8834 | static void kfree_stripe(struct stripe_head *sh) | |
8835 | { | |
8836 | - struct raid5_data *raid_conf = sh->raid_conf; | |
8837 | - int disks = raid_conf->raid_disks, j; | |
8838 | + raid5_conf_t *conf = sh->raid_conf; | |
8839 | + int disks = conf->raid_disks, j; | |
8840 | ||
8841 | PRINTK(("kfree_stripe called, stripe %lu\n", sh->sector)); | |
8842 | if (sh->phase != PHASE_COMPLETE || stripe_locked(sh) || sh->count) { | |
8843 | @@ -338,19 +342,19 @@ | |
8844 | if (sh->bh_new[j] || sh->bh_copy[j]) | |
8845 | printk("raid5: bug: sector %lu, new %p, copy %p\n", sh->sector, sh->bh_new[j], sh->bh_copy[j]); | |
8846 | } | |
8847 | - remove_hash(raid_conf, sh); | |
8848 | - put_free_stripe(raid_conf, sh); | |
8849 | + remove_hash(conf, sh); | |
8850 | + put_free_stripe(conf, sh); | |
8851 | } | |
8852 | ||
8853 | -static int shrink_stripe_cache(struct raid5_data *raid_conf, int nr) | |
8854 | +static int shrink_stripe_cache(raid5_conf_t *conf, int nr) | |
8855 | { | |
8856 | struct stripe_head *sh; | |
8857 | int i, count = 0; | |
8858 | ||
8859 | - PRINTK(("shrink_stripe_cache called, %d/%d, clock %d\n", nr, raid_conf->nr_hashed_stripes, raid_conf->clock)); | |
8860 | + PRINTK(("shrink_stripe_cache called, %d/%d, clock %d\n", nr, conf->nr_hashed_stripes, conf->clock)); | |
8861 | for (i = 0; i < NR_HASH; i++) { | |
8862 | repeat: | |
8863 | - sh = raid_conf->stripe_hashtbl[(i + raid_conf->clock) & HASH_MASK]; | |
8864 | + sh = conf->stripe_hashtbl[(i + conf->clock) & HASH_MASK]; | |
8865 | for (; sh; sh = sh->hash_next) { | |
8866 | if (sh->phase != PHASE_COMPLETE) | |
8867 | continue; | |
8868 | @@ -360,30 +364,30 @@ | |
8869 | continue; | |
8870 | kfree_stripe(sh); | |
8871 | if (++count == nr) { | |
8872 | - PRINTK(("shrink completed, nr_hashed_stripes %d\n", raid_conf->nr_hashed_stripes)); | |
8873 | - raid_conf->clock = (i + raid_conf->clock) & HASH_MASK; | |
8874 | + PRINTK(("shrink completed, nr_hashed_stripes %d\n", conf->nr_hashed_stripes)); | |
8875 | + conf->clock = (i + conf->clock) & HASH_MASK; | |
8876 | return nr; | |
8877 | } | |
8878 | goto repeat; | |
8879 | } | |
8880 | } | |
8881 | - PRINTK(("shrink completed, nr_hashed_stripes %d\n", raid_conf->nr_hashed_stripes)); | |
8882 | + PRINTK(("shrink completed, nr_hashed_stripes %d\n", conf->nr_hashed_stripes)); | |
8883 | return count; | |
8884 | } | |
8885 | ||
8886 | -static struct stripe_head *find_stripe(struct raid5_data *raid_conf, unsigned long sector, int size) | |
8887 | +static struct stripe_head *find_stripe(raid5_conf_t *conf, unsigned long sector, int size) | |
8888 | { | |
8889 | struct stripe_head *sh; | |
8890 | ||
8891 | - if (raid_conf->buffer_size != size) { | |
8892 | - PRINTK(("switching size, %d --> %d\n", raid_conf->buffer_size, size)); | |
8893 | - shrink_stripe_cache(raid_conf, raid_conf->max_nr_stripes); | |
8894 | - raid_conf->buffer_size = size; | |
8895 | + if (conf->buffer_size != size) { | |
8896 | + PRINTK(("switching size, %d --> %d\n", conf->buffer_size, size)); | |
8897 | + shrink_stripe_cache(conf, conf->max_nr_stripes); | |
8898 | + conf->buffer_size = size; | |
8899 | } | |
8900 | ||
8901 | PRINTK(("find_stripe, sector %lu\n", sector)); | |
8902 | - for (sh = stripe_hash(raid_conf, sector, size); sh; sh = sh->hash_next) | |
8903 | - if (sh->sector == sector && sh->raid_conf == raid_conf) { | |
8904 | + for (sh = stripe_hash(conf, sector, size); sh; sh = sh->hash_next) | |
8905 | + if (sh->sector == sector && sh->raid_conf == conf) { | |
8906 | if (sh->size == size) { | |
8907 | PRINTK(("found stripe %lu\n", sector)); | |
8908 | return sh; | |
8909 | @@ -397,7 +401,7 @@ | |
8910 | return NULL; | |
8911 | } | |
8912 | ||
8913 | -static int grow_stripes(struct raid5_data *raid_conf, int num, int priority) | |
8914 | +static int grow_stripes(raid5_conf_t *conf, int num, int priority) | |
8915 | { | |
8916 | struct stripe_head *sh; | |
8917 | ||
8918 | @@ -405,62 +409,64 @@ | |
8919 | if ((sh = kmalloc(sizeof(struct stripe_head), priority)) == NULL) | |
8920 | return 1; | |
8921 | memset(sh, 0, sizeof(*sh)); | |
8922 | - if (grow_buffers(sh, 2 * raid_conf->raid_disks, PAGE_SIZE, priority)) { | |
8923 | - shrink_buffers(sh, 2 * raid_conf->raid_disks); | |
8924 | + sh->stripe_lock = MD_SPIN_LOCK_UNLOCKED; | |
8925 | + | |
8926 | + if (grow_buffers(sh, 2 * conf->raid_disks, PAGE_SIZE, priority)) { | |
8927 | + shrink_buffers(sh, 2 * conf->raid_disks); | |
8928 | kfree(sh); | |
8929 | return 1; | |
8930 | } | |
8931 | - if (grow_bh(sh, raid_conf->raid_disks, priority)) { | |
8932 | - shrink_buffers(sh, 2 * raid_conf->raid_disks); | |
8933 | - shrink_bh(sh, raid_conf->raid_disks); | |
8934 | + if (grow_bh(sh, conf->raid_disks, priority)) { | |
8935 | + shrink_buffers(sh, 2 * conf->raid_disks); | |
8936 | + shrink_bh(sh, conf->raid_disks); | |
8937 | kfree(sh); | |
8938 | return 1; | |
8939 | } | |
8940 | - put_free_stripe(raid_conf, sh); | |
8941 | - raid_conf->nr_stripes++; | |
8942 | + put_free_stripe(conf, sh); | |
8943 | + conf->nr_stripes++; | |
8944 | } | |
8945 | return 0; | |
8946 | } | |
8947 | ||
8948 | -static void shrink_stripes(struct raid5_data *raid_conf, int num) | |
8949 | +static void shrink_stripes(raid5_conf_t *conf, int num) | |
8950 | { | |
8951 | struct stripe_head *sh; | |
8952 | ||
8953 | while (num--) { | |
8954 | - sh = get_free_stripe(raid_conf); | |
8955 | + sh = get_free_stripe(conf); | |
8956 | if (!sh) | |
8957 | break; | |
8958 | - shrink_buffers(sh, raid_conf->raid_disks * 2); | |
8959 | - shrink_bh(sh, raid_conf->raid_disks); | |
8960 | + shrink_buffers(sh, conf->raid_disks * 2); | |
8961 | + shrink_bh(sh, conf->raid_disks); | |
8962 | kfree(sh); | |
8963 | - raid_conf->nr_stripes--; | |
8964 | + conf->nr_stripes--; | |
8965 | } | |
8966 | } | |
8967 | ||
8968 | -static struct stripe_head *kmalloc_stripe(struct raid5_data *raid_conf, unsigned long sector, int size) | |
8969 | +static struct stripe_head *kmalloc_stripe(raid5_conf_t *conf, unsigned long sector, int size) | |
8970 | { | |
8971 | struct stripe_head *sh = NULL, *tmp; | |
8972 | struct buffer_head *buffer_pool, *bh_pool; | |
8973 | ||
8974 | PRINTK(("kmalloc_stripe called\n")); | |
8975 | ||
8976 | - while ((sh = get_free_stripe(raid_conf)) == NULL) { | |
8977 | - shrink_stripe_cache(raid_conf, raid_conf->max_nr_stripes / 8); | |
8978 | - if ((sh = get_free_stripe(raid_conf)) != NULL) | |
8979 | + while ((sh = get_free_stripe(conf)) == NULL) { | |
8980 | + shrink_stripe_cache(conf, conf->max_nr_stripes / 8); | |
8981 | + if ((sh = get_free_stripe(conf)) != NULL) | |
8982 | break; | |
8983 | - if (!raid_conf->nr_pending_stripes) | |
8984 | + if (!conf->nr_pending_stripes) | |
8985 | printk("raid5: bug: nr_free_sh == 0, nr_pending_stripes == 0\n"); | |
8986 | - md_wakeup_thread(raid_conf->thread); | |
8987 | + md_wakeup_thread(conf->thread); | |
8988 | PRINTK(("waiting for some stripes to complete\n")); | |
8989 | - sleep_on(&raid_conf->wait_for_stripe); | |
8990 | + sleep_on(&conf->wait_for_stripe); | |
8991 | } | |
8992 | ||
8993 | /* | |
8994 | * The above might have slept, so perhaps another process | |
8995 | * already created the stripe for us.. | |
8996 | */ | |
8997 | - if ((tmp = find_stripe(raid_conf, sector, size)) != NULL) { | |
8998 | - put_free_stripe(raid_conf, sh); | |
8999 | + if ((tmp = find_stripe(conf, sector, size)) != NULL) { | |
9000 | + put_free_stripe(conf, sh); | |
9001 | wait_on_stripe(tmp); | |
9002 | return tmp; | |
9003 | } | |
9004 | @@ -472,25 +478,25 @@ | |
9005 | sh->bh_pool = bh_pool; | |
9006 | sh->phase = PHASE_COMPLETE; | |
9007 | sh->cmd = STRIPE_NONE; | |
9008 | - sh->raid_conf = raid_conf; | |
9009 | + sh->raid_conf = conf; | |
9010 | sh->sector = sector; | |
9011 | sh->size = size; | |
9012 | - raid_conf->nr_cached_stripes++; | |
9013 | - insert_hash(raid_conf, sh); | |
9014 | + conf->nr_cached_stripes++; | |
9015 | + insert_hash(conf, sh); | |
9016 | } else printk("raid5: bug: kmalloc_stripe() == NULL\n"); | |
9017 | return sh; | |
9018 | } | |
9019 | ||
9020 | -static struct stripe_head *get_stripe(struct raid5_data *raid_conf, unsigned long sector, int size) | |
9021 | +static struct stripe_head *get_stripe(raid5_conf_t *conf, unsigned long sector, int size) | |
9022 | { | |
9023 | struct stripe_head *sh; | |
9024 | ||
9025 | PRINTK(("get_stripe, sector %lu\n", sector)); | |
9026 | - sh = find_stripe(raid_conf, sector, size); | |
9027 | + sh = find_stripe(conf, sector, size); | |
9028 | if (sh) | |
9029 | wait_on_stripe(sh); | |
9030 | else | |
9031 | - sh = kmalloc_stripe(raid_conf, sector, size); | |
9032 | + sh = kmalloc_stripe(conf, sector, size); | |
9033 | return sh; | |
9034 | } | |
9035 | ||
9036 | @@ -523,7 +529,7 @@ | |
9037 | bh->b_end_io(bh, uptodate); | |
9038 | if (!uptodate) | |
9039 | printk(KERN_ALERT "raid5: %s: unrecoverable I/O error for " | |
9040 | - "block %lu\n", kdevname(bh->b_dev), bh->b_blocknr); | |
9041 | + "block %lu\n", partition_name(bh->b_dev), bh->b_blocknr); | |
9042 | } | |
9043 | ||
9044 | static inline void raid5_mark_buffer_uptodate (struct buffer_head *bh, int uptodate) | |
9045 | @@ -537,36 +543,35 @@ | |
9046 | static void raid5_end_request (struct buffer_head * bh, int uptodate) | |
9047 | { | |
9048 | struct stripe_head *sh = bh->b_dev_id; | |
9049 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9050 | - int disks = raid_conf->raid_disks, i; | |
9051 | + raid5_conf_t *conf = sh->raid_conf; | |
9052 | + int disks = conf->raid_disks, i; | |
9053 | unsigned long flags; | |
9054 | ||
9055 | PRINTK(("end_request %lu, nr_pending %d\n", sh->sector, sh->nr_pending)); | |
9056 | - save_flags(flags); | |
9057 | - cli(); | |
9058 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
9059 | raid5_mark_buffer_uptodate(bh, uptodate); | |
9060 | - --sh->nr_pending; | |
9061 | - if (!sh->nr_pending) { | |
9062 | - md_wakeup_thread(raid_conf->thread); | |
9063 | - atomic_inc(&raid_conf->nr_handle); | |
9064 | + if (atomic_dec_and_test(&sh->nr_pending)) { | |
9065 | + md_wakeup_thread(conf->thread); | |
9066 | + atomic_inc(&conf->nr_handle); | |
9067 | } | |
9068 | - if (!uptodate) | |
9069 | + if (!uptodate) { | |
9070 | md_error(bh->b_dev, bh->b_rdev); | |
9071 | - if (raid_conf->failed_disks) { | |
9072 | + } | |
9073 | + if (conf->failed_disks) { | |
9074 | for (i = 0; i < disks; i++) { | |
9075 | - if (raid_conf->disks[i].operational) | |
9076 | + if (conf->disks[i].operational) | |
9077 | continue; | |
9078 | if (bh != sh->bh_old[i] && bh != sh->bh_req[i] && bh != sh->bh_copy[i]) | |
9079 | continue; | |
9080 | - if (bh->b_rdev != raid_conf->disks[i].dev) | |
9081 | + if (bh->b_rdev != conf->disks[i].dev) | |
9082 | continue; | |
9083 | set_bit(STRIPE_ERROR, &sh->state); | |
9084 | } | |
9085 | } | |
9086 | - restore_flags(flags); | |
9087 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
9088 | } | |
9089 | ||
9090 | -static int raid5_map (struct md_dev *mddev, kdev_t *rdev, | |
9091 | +static int raid5_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
9092 | unsigned long *rsector, unsigned long size) | |
9093 | { | |
9094 | /* No complex mapping used: the core of the work is done in the | |
9095 | @@ -577,11 +582,10 @@ | |
9096 | ||
9097 | static void raid5_build_block (struct stripe_head *sh, struct buffer_head *bh, int i) | |
9098 | { | |
9099 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9100 | - struct md_dev *mddev = raid_conf->mddev; | |
9101 | - int minor = (int) (mddev - md_dev); | |
9102 | + raid5_conf_t *conf = sh->raid_conf; | |
9103 | + mddev_t *mddev = conf->mddev; | |
9104 | char *b_data; | |
9105 | - kdev_t dev = MKDEV(MD_MAJOR, minor); | |
9106 | + kdev_t dev = mddev_to_kdev(mddev); | |
9107 | int block = sh->sector / (sh->size >> 9); | |
9108 | ||
9109 | b_data = ((volatile struct buffer_head *) bh)->b_data; | |
9110 | @@ -589,7 +593,7 @@ | |
9111 | init_buffer(bh, dev, block, raid5_end_request, sh); | |
9112 | ((volatile struct buffer_head *) bh)->b_data = b_data; | |
9113 | ||
9114 | - bh->b_rdev = raid_conf->disks[i].dev; | |
9115 | + bh->b_rdev = conf->disks[i].dev; | |
9116 | bh->b_rsector = sh->sector; | |
9117 | ||
9118 | bh->b_state = (1 << BH_Req); | |
9119 | @@ -597,33 +601,62 @@ | |
9120 | bh->b_list = BUF_LOCKED; | |
9121 | } | |
9122 | ||
9123 | -static int raid5_error (struct md_dev *mddev, kdev_t dev) | |
9124 | +static int raid5_error (mddev_t *mddev, kdev_t dev) | |
9125 | { | |
9126 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
9127 | - md_superblock_t *sb = mddev->sb; | |
9128 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
9129 | + mdp_super_t *sb = mddev->sb; | |
9130 | struct disk_info *disk; | |
9131 | int i; | |
9132 | ||
9133 | PRINTK(("raid5_error called\n")); | |
9134 | - raid_conf->resync_parity = 0; | |
9135 | - for (i = 0, disk = raid_conf->disks; i < raid_conf->raid_disks; i++, disk++) | |
9136 | + conf->resync_parity = 0; | |
9137 | + for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) { | |
9138 | if (disk->dev == dev && disk->operational) { | |
9139 | disk->operational = 0; | |
9140 | - sb->disks[disk->number].state |= (1 << MD_FAULTY_DEVICE); | |
9141 | - sb->disks[disk->number].state &= ~(1 << MD_SYNC_DEVICE); | |
9142 | - sb->disks[disk->number].state &= ~(1 << MD_ACTIVE_DEVICE); | |
9143 | + mark_disk_faulty(sb->disks+disk->number); | |
9144 | + mark_disk_nonsync(sb->disks+disk->number); | |
9145 | + mark_disk_inactive(sb->disks+disk->number); | |
9146 | sb->active_disks--; | |
9147 | sb->working_disks--; | |
9148 | sb->failed_disks++; | |
9149 | mddev->sb_dirty = 1; | |
9150 | - raid_conf->working_disks--; | |
9151 | - raid_conf->failed_disks++; | |
9152 | - md_wakeup_thread(raid_conf->thread); | |
9153 | + conf->working_disks--; | |
9154 | + conf->failed_disks++; | |
9155 | + md_wakeup_thread(conf->thread); | |
9156 | printk (KERN_ALERT | |
9157 | - "RAID5: Disk failure on %s, disabling device." | |
9158 | - "Operation continuing on %d devices\n", | |
9159 | - kdevname (dev), raid_conf->working_disks); | |
9160 | + "raid5: Disk failure on %s, disabling device." | |
9161 | + " Operation continuing on %d devices\n", | |
9162 | + partition_name (dev), conf->working_disks); | |
9163 | + return -EIO; | |
9164 | } | |
9165 | + } | |
9166 | + /* | |
9167 | + * handle errors in spares (during reconstruction) | |
9168 | + */ | |
9169 | + if (conf->spare) { | |
9170 | + disk = conf->spare; | |
9171 | + if (disk->dev == dev) { | |
9172 | + printk (KERN_ALERT | |
9173 | + "raid5: Disk failure on spare %s\n", | |
9174 | + partition_name (dev)); | |
9175 | + if (!conf->spare->operational) { | |
9176 | + MD_BUG(); | |
9177 | + return -EIO; | |
9178 | + } | |
9179 | + disk->operational = 0; | |
9180 | + disk->write_only = 0; | |
9181 | + conf->spare = NULL; | |
9182 | + mark_disk_faulty(sb->disks+disk->number); | |
9183 | + mark_disk_nonsync(sb->disks+disk->number); | |
9184 | + mark_disk_inactive(sb->disks+disk->number); | |
9185 | + sb->spare_disks--; | |
9186 | + sb->working_disks--; | |
9187 | + sb->failed_disks++; | |
9188 | + | |
9189 | + return -EIO; | |
9190 | + } | |
9191 | + } | |
9192 | + MD_BUG(); | |
9193 | return 0; | |
9194 | } | |
9195 | ||
9196 | @@ -634,12 +667,12 @@ | |
9197 | static inline unsigned long | |
9198 | raid5_compute_sector (int r_sector, unsigned int raid_disks, unsigned int data_disks, | |
9199 | unsigned int * dd_idx, unsigned int * pd_idx, | |
9200 | - struct raid5_data *raid_conf) | |
9201 | + raid5_conf_t *conf) | |
9202 | { | |
9203 | unsigned int stripe; | |
9204 | int chunk_number, chunk_offset; | |
9205 | unsigned long new_sector; | |
9206 | - int sectors_per_chunk = raid_conf->chunk_size >> 9; | |
9207 | + int sectors_per_chunk = conf->chunk_size >> 9; | |
9208 | ||
9209 | /* First compute the information on this sector */ | |
9210 | ||
9211 | @@ -662,9 +695,9 @@ | |
9212 | /* | |
9213 | * Select the parity disk based on the user selected algorithm. | |
9214 | */ | |
9215 | - if (raid_conf->level == 4) | |
9216 | + if (conf->level == 4) | |
9217 | *pd_idx = data_disks; | |
9218 | - else switch (raid_conf->algorithm) { | |
9219 | + else switch (conf->algorithm) { | |
9220 | case ALGORITHM_LEFT_ASYMMETRIC: | |
9221 | *pd_idx = data_disks - stripe % raid_disks; | |
9222 | if (*dd_idx >= *pd_idx) | |
9223 | @@ -684,7 +717,7 @@ | |
9224 | *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; | |
9225 | break; | |
9226 | default: | |
9227 | - printk ("raid5: unsupported algorithm %d\n", raid_conf->algorithm); | |
9228 | + printk ("raid5: unsupported algorithm %d\n", conf->algorithm); | |
9229 | } | |
9230 | ||
9231 | /* | |
9232 | @@ -705,16 +738,16 @@ | |
9233 | ||
9234 | static unsigned long compute_blocknr(struct stripe_head *sh, int i) | |
9235 | { | |
9236 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9237 | - int raid_disks = raid_conf->raid_disks, data_disks = raid_disks - 1; | |
9238 | + raid5_conf_t *conf = sh->raid_conf; | |
9239 | + int raid_disks = conf->raid_disks, data_disks = raid_disks - 1; | |
9240 | unsigned long new_sector = sh->sector, check; | |
9241 | - int sectors_per_chunk = raid_conf->chunk_size >> 9; | |
9242 | + int sectors_per_chunk = conf->chunk_size >> 9; | |
9243 | unsigned long stripe = new_sector / sectors_per_chunk; | |
9244 | int chunk_offset = new_sector % sectors_per_chunk; | |
9245 | int chunk_number, dummy1, dummy2, dd_idx = i; | |
9246 | unsigned long r_sector, blocknr; | |
9247 | ||
9248 | - switch (raid_conf->algorithm) { | |
9249 | + switch (conf->algorithm) { | |
9250 | case ALGORITHM_LEFT_ASYMMETRIC: | |
9251 | case ALGORITHM_RIGHT_ASYMMETRIC: | |
9252 | if (i > sh->pd_idx) | |
9253 | @@ -727,14 +760,14 @@ | |
9254 | i -= (sh->pd_idx + 1); | |
9255 | break; | |
9256 | default: | |
9257 | - printk ("raid5: unsupported algorithm %d\n", raid_conf->algorithm); | |
9258 | + printk ("raid5: unsupported algorithm %d\n", conf->algorithm); | |
9259 | } | |
9260 | ||
9261 | chunk_number = stripe * data_disks + i; | |
9262 | r_sector = chunk_number * sectors_per_chunk + chunk_offset; | |
9263 | blocknr = r_sector / (sh->size >> 9); | |
9264 | ||
9265 | - check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, raid_conf); | |
9266 | + check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); | |
9267 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { | |
9268 | printk("compute_blocknr: map not correct\n"); | |
9269 | return 0; | |
9270 | @@ -742,36 +775,11 @@ | |
9271 | return blocknr; | |
9272 | } | |
9273 | ||
9274 | -#ifdef HAVE_ARCH_XORBLOCK | |
9275 | -static void xor_block(struct buffer_head *dest, struct buffer_head *source) | |
9276 | -{ | |
9277 | - __xor_block((char *) dest->b_data, (char *) source->b_data, dest->b_size); | |
9278 | -} | |
9279 | -#else | |
9280 | -static void xor_block(struct buffer_head *dest, struct buffer_head *source) | |
9281 | -{ | |
9282 | - long lines = dest->b_size / (sizeof (long)) / 8, i; | |
9283 | - long *destp = (long *) dest->b_data, *sourcep = (long *) source->b_data; | |
9284 | - | |
9285 | - for (i = lines; i > 0; i--) { | |
9286 | - *(destp + 0) ^= *(sourcep + 0); | |
9287 | - *(destp + 1) ^= *(sourcep + 1); | |
9288 | - *(destp + 2) ^= *(sourcep + 2); | |
9289 | - *(destp + 3) ^= *(sourcep + 3); | |
9290 | - *(destp + 4) ^= *(sourcep + 4); | |
9291 | - *(destp + 5) ^= *(sourcep + 5); | |
9292 | - *(destp + 6) ^= *(sourcep + 6); | |
9293 | - *(destp + 7) ^= *(sourcep + 7); | |
9294 | - destp += 8; | |
9295 | - sourcep += 8; | |
9296 | - } | |
9297 | -} | |
9298 | -#endif | |
9299 | - | |
9300 | static void compute_block(struct stripe_head *sh, int dd_idx) | |
9301 | { | |
9302 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9303 | - int i, disks = raid_conf->raid_disks; | |
9304 | + raid5_conf_t *conf = sh->raid_conf; | |
9305 | + int i, count, disks = conf->raid_disks; | |
9306 | + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; | |
9307 | ||
9308 | PRINTK(("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx)); | |
9309 | ||
9310 | @@ -780,69 +788,100 @@ | |
9311 | raid5_build_block(sh, sh->bh_old[dd_idx], dd_idx); | |
9312 | ||
9313 | memset(sh->bh_old[dd_idx]->b_data, 0, sh->size); | |
9314 | + bh_ptr[0] = sh->bh_old[dd_idx]; | |
9315 | + count = 1; | |
9316 | for (i = 0; i < disks; i++) { | |
9317 | if (i == dd_idx) | |
9318 | continue; | |
9319 | if (sh->bh_old[i]) { | |
9320 | - xor_block(sh->bh_old[dd_idx], sh->bh_old[i]); | |
9321 | - continue; | |
9322 | - } else | |
9323 | + bh_ptr[count++] = sh->bh_old[i]; | |
9324 | + } else { | |
9325 | printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i); | |
9326 | + } | |
9327 | + if (count == MAX_XOR_BLOCKS) { | |
9328 | + xor_block(count, &bh_ptr[0]); | |
9329 | + count = 1; | |
9330 | + } | |
9331 | + } | |
9332 | + if(count != 1) { | |
9333 | + xor_block(count, &bh_ptr[0]); | |
9334 | } | |
9335 | raid5_mark_buffer_uptodate(sh->bh_old[dd_idx], 1); | |
9336 | } | |
9337 | ||
9338 | static void compute_parity(struct stripe_head *sh, int method) | |
9339 | { | |
9340 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9341 | - int i, pd_idx = sh->pd_idx, disks = raid_conf->raid_disks; | |
9342 | + raid5_conf_t *conf = sh->raid_conf; | |
9343 | + int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, lowprio, count; | |
9344 | + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; | |
9345 | ||
9346 | PRINTK(("compute_parity, stripe %lu, method %d\n", sh->sector, method)); | |
9347 | + lowprio = 1; | |
9348 | for (i = 0; i < disks; i++) { | |
9349 | if (i == pd_idx || !sh->bh_new[i]) | |
9350 | continue; | |
9351 | if (!sh->bh_copy[i]) | |
9352 | sh->bh_copy[i] = raid5_kmalloc_buffer(sh, sh->size); | |
9353 | raid5_build_block(sh, sh->bh_copy[i], i); | |
9354 | + if (!buffer_lowprio(sh->bh_new[i])) | |
9355 | + lowprio = 0; | |
9356 | + else | |
9357 | + mark_buffer_lowprio(sh->bh_copy[i]); | |
9358 | mark_buffer_clean(sh->bh_new[i]); | |
9359 | memcpy(sh->bh_copy[i]->b_data, sh->bh_new[i]->b_data, sh->size); | |
9360 | } | |
9361 | if (sh->bh_copy[pd_idx] == NULL) | |
9362 | sh->bh_copy[pd_idx] = raid5_kmalloc_buffer(sh, sh->size); | |
9363 | raid5_build_block(sh, sh->bh_copy[pd_idx], sh->pd_idx); | |
9364 | + if (lowprio) | |
9365 | + mark_buffer_lowprio(sh->bh_copy[pd_idx]); | |
9366 | ||
9367 | if (method == RECONSTRUCT_WRITE) { | |
9368 | memset(sh->bh_copy[pd_idx]->b_data, 0, sh->size); | |
9369 | + bh_ptr[0] = sh->bh_copy[pd_idx]; | |
9370 | + count = 1; | |
9371 | for (i = 0; i < disks; i++) { | |
9372 | if (i == sh->pd_idx) | |
9373 | continue; | |
9374 | if (sh->bh_new[i]) { | |
9375 | - xor_block(sh->bh_copy[pd_idx], sh->bh_copy[i]); | |
9376 | - continue; | |
9377 | + bh_ptr[count++] = sh->bh_copy[i]; | |
9378 | + } else if (sh->bh_old[i]) { | |
9379 | + bh_ptr[count++] = sh->bh_old[i]; | |
9380 | } | |
9381 | - if (sh->bh_old[i]) { | |
9382 | - xor_block(sh->bh_copy[pd_idx], sh->bh_old[i]); | |
9383 | - continue; | |
9384 | + if (count == MAX_XOR_BLOCKS) { | |
9385 | + xor_block(count, &bh_ptr[0]); | |
9386 | + count = 1; | |
9387 | } | |
9388 | } | |
9389 | + if (count != 1) { | |
9390 | + xor_block(count, &bh_ptr[0]); | |
9391 | + } | |
9392 | } else if (method == READ_MODIFY_WRITE) { | |
9393 | memcpy(sh->bh_copy[pd_idx]->b_data, sh->bh_old[pd_idx]->b_data, sh->size); | |
9394 | + bh_ptr[0] = sh->bh_copy[pd_idx]; | |
9395 | + count = 1; | |
9396 | for (i = 0; i < disks; i++) { | |
9397 | if (i == sh->pd_idx) | |
9398 | continue; | |
9399 | if (sh->bh_new[i] && sh->bh_old[i]) { | |
9400 | - xor_block(sh->bh_copy[pd_idx], sh->bh_copy[i]); | |
9401 | - xor_block(sh->bh_copy[pd_idx], sh->bh_old[i]); | |
9402 | - continue; | |
9403 | + bh_ptr[count++] = sh->bh_copy[i]; | |
9404 | + bh_ptr[count++] = sh->bh_old[i]; | |
9405 | + } | |
9406 | + if (count >= (MAX_XOR_BLOCKS - 1)) { | |
9407 | + xor_block(count, &bh_ptr[0]); | |
9408 | + count = 1; | |
9409 | } | |
9410 | } | |
9411 | + if (count != 1) { | |
9412 | + xor_block(count, &bh_ptr[0]); | |
9413 | + } | |
9414 | } | |
9415 | raid5_mark_buffer_uptodate(sh->bh_copy[pd_idx], 1); | |
9416 | } | |
9417 | ||
9418 | static void add_stripe_bh (struct stripe_head *sh, struct buffer_head *bh, int dd_idx, int rw) | |
9419 | { | |
9420 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9421 | + raid5_conf_t *conf = sh->raid_conf; | |
9422 | struct buffer_head *bh_req; | |
9423 | ||
9424 | if (sh->bh_new[dd_idx]) { | |
9425 | @@ -860,19 +899,22 @@ | |
9426 | if (sh->phase == PHASE_COMPLETE && sh->cmd == STRIPE_NONE) { | |
9427 | sh->phase = PHASE_BEGIN; | |
9428 | sh->cmd = (rw == READ) ? STRIPE_READ : STRIPE_WRITE; | |
9429 | - raid_conf->nr_pending_stripes++; | |
9430 | - atomic_inc(&raid_conf->nr_handle); | |
9431 | + conf->nr_pending_stripes++; | |
9432 | + atomic_inc(&conf->nr_handle); | |
9433 | } | |
9434 | sh->bh_new[dd_idx] = bh; | |
9435 | sh->bh_req[dd_idx] = bh_req; | |
9436 | sh->cmd_new[dd_idx] = rw; | |
9437 | sh->new[dd_idx] = 1; | |
9438 | + | |
9439 | + if (buffer_lowprio(bh)) | |
9440 | + mark_buffer_lowprio(bh_req); | |
9441 | } | |
9442 | ||
9443 | static void complete_stripe(struct stripe_head *sh) | |
9444 | { | |
9445 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9446 | - int disks = raid_conf->raid_disks; | |
9447 | + raid5_conf_t *conf = sh->raid_conf; | |
9448 | + int disks = conf->raid_disks; | |
9449 | int i, new = 0; | |
9450 | ||
9451 | PRINTK(("complete_stripe %lu\n", sh->sector)); | |
9452 | @@ -909,6 +951,22 @@ | |
9453 | } | |
9454 | } | |
9455 | ||
9456 | + | |
9457 | +static int is_stripe_lowprio(struct stripe_head *sh, int disks) | |
9458 | +{ | |
9459 | + int i, lowprio = 1; | |
9460 | + | |
9461 | + for (i = 0; i < disks; i++) { | |
9462 | + if (sh->bh_new[i]) | |
9463 | + if (!buffer_lowprio(sh->bh_new[i])) | |
9464 | + lowprio = 0; | |
9465 | + if (sh->bh_old[i]) | |
9466 | + if (!buffer_lowprio(sh->bh_old[i])) | |
9467 | + lowprio = 0; | |
9468 | + } | |
9469 | + return lowprio; | |
9470 | +} | |
9471 | + | |
9472 | /* | |
9473 | * handle_stripe() is our main logic routine. Note that: | |
9474 | * | |
9475 | @@ -919,28 +977,27 @@ | |
9476 | * 2. We should be careful to set sh->nr_pending whenever we sleep, | |
9477 | * to prevent re-entry of handle_stripe() for the same sh. | |
9478 | * | |
9479 | - * 3. raid_conf->failed_disks and disk->operational can be changed | |
9480 | + * 3. conf->failed_disks and disk->operational can be changed | |
9481 | * from an interrupt. This complicates things a bit, but it allows | |
9482 | * us to stop issuing requests for a failed drive as soon as possible. | |
9483 | */ | |
9484 | static void handle_stripe(struct stripe_head *sh) | |
9485 | { | |
9486 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9487 | - struct md_dev *mddev = raid_conf->mddev; | |
9488 | - int minor = (int) (mddev - md_dev); | |
9489 | + raid5_conf_t *conf = sh->raid_conf; | |
9490 | + mddev_t *mddev = conf->mddev; | |
9491 | struct buffer_head *bh; | |
9492 | - int disks = raid_conf->raid_disks; | |
9493 | - int i, nr = 0, nr_read = 0, nr_write = 0; | |
9494 | + int disks = conf->raid_disks; | |
9495 | + int i, nr = 0, nr_read = 0, nr_write = 0, lowprio; | |
9496 | int nr_cache = 0, nr_cache_other = 0, nr_cache_overwrite = 0, parity = 0; | |
9497 | int nr_failed_other = 0, nr_failed_overwrite = 0, parity_failed = 0; | |
9498 | int reading = 0, nr_writing = 0; | |
9499 | int method1 = INT_MAX, method2 = INT_MAX; | |
9500 | int block; | |
9501 | unsigned long flags; | |
9502 | - int operational[MD_SB_DISKS], failed_disks = raid_conf->failed_disks; | |
9503 | + int operational[MD_SB_DISKS], failed_disks = conf->failed_disks; | |
9504 | ||
9505 | PRINTK(("handle_stripe(), stripe %lu\n", sh->sector)); | |
9506 | - if (sh->nr_pending) { | |
9507 | + if (md_atomic_read(&sh->nr_pending)) { | |
9508 | printk("handle_stripe(), stripe %lu, io still pending\n", sh->sector); | |
9509 | return; | |
9510 | } | |
9511 | @@ -949,9 +1006,9 @@ | |
9512 | return; | |
9513 | } | |
9514 | ||
9515 | - atomic_dec(&raid_conf->nr_handle); | |
9516 | + atomic_dec(&conf->nr_handle); | |
9517 | ||
9518 | - if (test_and_clear_bit(STRIPE_ERROR, &sh->state)) { | |
9519 | + if (md_test_and_clear_bit(STRIPE_ERROR, &sh->state)) { | |
9520 | printk("raid5: restarting stripe %lu\n", sh->sector); | |
9521 | sh->phase = PHASE_BEGIN; | |
9522 | } | |
9523 | @@ -969,11 +1026,11 @@ | |
9524 | save_flags(flags); | |
9525 | cli(); | |
9526 | for (i = 0; i < disks; i++) { | |
9527 | - operational[i] = raid_conf->disks[i].operational; | |
9528 | - if (i == sh->pd_idx && raid_conf->resync_parity) | |
9529 | + operational[i] = conf->disks[i].operational; | |
9530 | + if (i == sh->pd_idx && conf->resync_parity) | |
9531 | operational[i] = 0; | |
9532 | } | |
9533 | - failed_disks = raid_conf->failed_disks; | |
9534 | + failed_disks = conf->failed_disks; | |
9535 | restore_flags(flags); | |
9536 | ||
9537 | if (failed_disks > 1) { | |
9538 | @@ -1017,7 +1074,7 @@ | |
9539 | } | |
9540 | ||
9541 | if (nr_write && nr_read) | |
9542 | - printk("raid5: bug, nr_write == %d, nr_read == %d, sh->cmd == %d\n", nr_write, nr_read, sh->cmd); | |
9543 | + printk("raid5: bug, nr_write ==`%d, nr_read == %d, sh->cmd == %d\n", nr_write, nr_read, sh->cmd); | |
9544 | ||
9545 | if (nr_write) { | |
9546 | /* | |
9547 | @@ -1030,7 +1087,7 @@ | |
9548 | if (sh->bh_new[i]) | |
9549 | continue; | |
9550 | block = (int) compute_blocknr(sh, i); | |
9551 | - bh = find_buffer(MKDEV(MD_MAJOR, minor), block, sh->size); | |
9552 | + bh = find_buffer(mddev_to_kdev(mddev), block, sh->size); | |
9553 | if (bh && bh->b_count == 0 && buffer_dirty(bh) && !buffer_locked(bh)) { | |
9554 | PRINTK(("Whee.. sector %lu, index %d (%d) found in the buffer cache!\n", sh->sector, i, block)); | |
9555 | add_stripe_bh(sh, bh, i, WRITE); | |
9556 | @@ -1064,21 +1121,22 @@ | |
9557 | ||
9558 | if (!method1 || !method2) { | |
9559 | lock_stripe(sh); | |
9560 | - sh->nr_pending++; | |
9561 | + lowprio = is_stripe_lowprio(sh, disks); | |
9562 | + atomic_inc(&sh->nr_pending); | |
9563 | sh->phase = PHASE_WRITE; | |
9564 | compute_parity(sh, method1 <= method2 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); | |
9565 | for (i = 0; i < disks; i++) { | |
9566 | - if (!operational[i] && !raid_conf->spare && !raid_conf->resync_parity) | |
9567 | + if (!operational[i] && !conf->spare && !conf->resync_parity) | |
9568 | continue; | |
9569 | if (i == sh->pd_idx || sh->bh_new[i]) | |
9570 | nr_writing++; | |
9571 | } | |
9572 | ||
9573 | - sh->nr_pending = nr_writing; | |
9574 | - PRINTK(("handle_stripe() %lu, writing back %d\n", sh->sector, sh->nr_pending)); | |
9575 | + md_atomic_set(&sh->nr_pending, nr_writing); | |
9576 | + PRINTK(("handle_stripe() %lu, writing back %d\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
9577 | ||
9578 | for (i = 0; i < disks; i++) { | |
9579 | - if (!operational[i] && !raid_conf->spare && !raid_conf->resync_parity) | |
9580 | + if (!operational[i] && !conf->spare && !conf->resync_parity) | |
9581 | continue; | |
9582 | bh = sh->bh_copy[i]; | |
9583 | if (i != sh->pd_idx && ((bh == NULL) ^ (sh->bh_new[i] == NULL))) | |
9584 | @@ -1089,18 +1147,30 @@ | |
9585 | bh->b_state |= (1<<BH_Dirty); | |
9586 | PRINTK(("making request for buffer %d\n", i)); | |
9587 | clear_bit(BH_Lock, &bh->b_state); | |
9588 | - if (!operational[i] && !raid_conf->resync_parity) { | |
9589 | - bh->b_rdev = raid_conf->spare->dev; | |
9590 | - make_request(MAJOR(raid_conf->spare->dev), WRITE, bh); | |
9591 | - } else | |
9592 | - make_request(MAJOR(raid_conf->disks[i].dev), WRITE, bh); | |
9593 | + if (!operational[i] && !conf->resync_parity) { | |
9594 | + bh->b_rdev = conf->spare->dev; | |
9595 | + make_request(MAJOR(conf->spare->dev), WRITE, bh); | |
9596 | + } else { | |
9597 | +#if 0 | |
9598 | + make_request(MAJOR(conf->disks[i].dev), WRITE, bh); | |
9599 | +#else | |
9600 | + if (!lowprio || (i==sh->pd_idx)) | |
9601 | + make_request(MAJOR(conf->disks[i].dev), WRITE, bh); | |
9602 | + else { | |
9603 | + mark_buffer_clean(bh); | |
9604 | + raid5_end_request(bh,1); | |
9605 | + sh->new[i] = 0; | |
9606 | + } | |
9607 | +#endif | |
9608 | + } | |
9609 | } | |
9610 | } | |
9611 | return; | |
9612 | } | |
9613 | ||
9614 | lock_stripe(sh); | |
9615 | - sh->nr_pending++; | |
9616 | + lowprio = is_stripe_lowprio(sh, disks); | |
9617 | + atomic_inc(&sh->nr_pending); | |
9618 | if (method1 < method2) { | |
9619 | sh->write_method = RECONSTRUCT_WRITE; | |
9620 | for (i = 0; i < disks; i++) { | |
9621 | @@ -1110,6 +1180,8 @@ | |
9622 | continue; | |
9623 | sh->bh_old[i] = raid5_kmalloc_buffer(sh, sh->size); | |
9624 | raid5_build_block(sh, sh->bh_old[i], i); | |
9625 | + if (lowprio) | |
9626 | + mark_buffer_lowprio(sh->bh_old[i]); | |
9627 | reading++; | |
9628 | } | |
9629 | } else { | |
9630 | @@ -1121,19 +1193,21 @@ | |
9631 | continue; | |
9632 | sh->bh_old[i] = raid5_kmalloc_buffer(sh, sh->size); | |
9633 | raid5_build_block(sh, sh->bh_old[i], i); | |
9634 | + if (lowprio) | |
9635 | + mark_buffer_lowprio(sh->bh_old[i]); | |
9636 | reading++; | |
9637 | } | |
9638 | } | |
9639 | sh->phase = PHASE_READ_OLD; | |
9640 | - sh->nr_pending = reading; | |
9641 | - PRINTK(("handle_stripe() %lu, reading %d old buffers\n", sh->sector, sh->nr_pending)); | |
9642 | + md_atomic_set(&sh->nr_pending, reading); | |
9643 | + PRINTK(("handle_stripe() %lu, reading %d old buffers\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
9644 | for (i = 0; i < disks; i++) { | |
9645 | if (!sh->bh_old[i]) | |
9646 | continue; | |
9647 | if (buffer_uptodate(sh->bh_old[i])) | |
9648 | continue; | |
9649 | clear_bit(BH_Lock, &sh->bh_old[i]->b_state); | |
9650 | - make_request(MAJOR(raid_conf->disks[i].dev), READ, sh->bh_old[i]); | |
9651 | + make_request(MAJOR(conf->disks[i].dev), READ, sh->bh_old[i]); | |
9652 | } | |
9653 | } else { | |
9654 | /* | |
9655 | @@ -1141,7 +1215,8 @@ | |
9656 | */ | |
9657 | method1 = nr_read - nr_cache_overwrite; | |
9658 | lock_stripe(sh); | |
9659 | - sh->nr_pending++; | |
9660 | + lowprio = is_stripe_lowprio(sh,disks); | |
9661 | + atomic_inc(&sh->nr_pending); | |
9662 | ||
9663 | PRINTK(("handle_stripe(), sector %lu, nr_read %d, nr_cache %d, method1 %d\n", sh->sector, nr_read, nr_cache, method1)); | |
9664 | if (!method1 || (method1 == 1 && nr_cache == disks - 1)) { | |
9665 | @@ -1149,18 +1224,22 @@ | |
9666 | for (i = 0; i < disks; i++) { | |
9667 | if (!sh->bh_new[i]) | |
9668 | continue; | |
9669 | - if (!sh->bh_old[i]) | |
9670 | + if (!sh->bh_old[i]) { | |
9671 | compute_block(sh, i); | |
9672 | + if (lowprio) | |
9673 | + mark_buffer_lowprio | |
9674 | + (sh->bh_old[i]); | |
9675 | + } | |
9676 | memcpy(sh->bh_new[i]->b_data, sh->bh_old[i]->b_data, sh->size); | |
9677 | } | |
9678 | - sh->nr_pending--; | |
9679 | + atomic_dec(&sh->nr_pending); | |
9680 | complete_stripe(sh); | |
9681 | return; | |
9682 | } | |
9683 | if (nr_failed_overwrite) { | |
9684 | sh->phase = PHASE_READ_OLD; | |
9685 | - sh->nr_pending = (disks - 1) - nr_cache; | |
9686 | - PRINTK(("handle_stripe() %lu, phase READ_OLD, pending %d\n", sh->sector, sh->nr_pending)); | |
9687 | + md_atomic_set(&sh->nr_pending, (disks - 1) - nr_cache); | |
9688 | + PRINTK(("handle_stripe() %lu, phase READ_OLD, pending %d\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
9689 | for (i = 0; i < disks; i++) { | |
9690 | if (sh->bh_old[i]) | |
9691 | continue; | |
9692 | @@ -1168,13 +1247,16 @@ | |
9693 | continue; | |
9694 | sh->bh_old[i] = raid5_kmalloc_buffer(sh, sh->size); | |
9695 | raid5_build_block(sh, sh->bh_old[i], i); | |
9696 | + if (lowprio) | |
9697 | + mark_buffer_lowprio(sh->bh_old[i]); | |
9698 | clear_bit(BH_Lock, &sh->bh_old[i]->b_state); | |
9699 | - make_request(MAJOR(raid_conf->disks[i].dev), READ, sh->bh_old[i]); | |
9700 | + make_request(MAJOR(conf->disks[i].dev), READ, sh->bh_old[i]); | |
9701 | } | |
9702 | } else { | |
9703 | sh->phase = PHASE_READ; | |
9704 | - sh->nr_pending = nr_read - nr_cache_overwrite; | |
9705 | - PRINTK(("handle_stripe() %lu, phase READ, pending %d\n", sh->sector, sh->nr_pending)); | |
9706 | + md_atomic_set(&sh->nr_pending, | |
9707 | + nr_read - nr_cache_overwrite); | |
9708 | + PRINTK(("handle_stripe() %lu, phase READ, pending %d\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
9709 | for (i = 0; i < disks; i++) { | |
9710 | if (!sh->bh_new[i]) | |
9711 | continue; | |
9712 | @@ -1182,16 +1264,16 @@ | |
9713 | memcpy(sh->bh_new[i]->b_data, sh->bh_old[i]->b_data, sh->size); | |
9714 | continue; | |
9715 | } | |
9716 | - make_request(MAJOR(raid_conf->disks[i].dev), READ, sh->bh_req[i]); | |
9717 | + make_request(MAJOR(conf->disks[i].dev), READ, sh->bh_req[i]); | |
9718 | } | |
9719 | } | |
9720 | } | |
9721 | } | |
9722 | ||
9723 | -static int raid5_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh) | |
9724 | +static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh) | |
9725 | { | |
9726 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
9727 | - const unsigned int raid_disks = raid_conf->raid_disks; | |
9728 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
9729 | + const unsigned int raid_disks = conf->raid_disks; | |
9730 | const unsigned int data_disks = raid_disks - 1; | |
9731 | unsigned int dd_idx, pd_idx; | |
9732 | unsigned long new_sector; | |
9733 | @@ -1202,15 +1284,15 @@ | |
9734 | if (rw == WRITEA) rw = WRITE; | |
9735 | ||
9736 | new_sector = raid5_compute_sector(bh->b_rsector, raid_disks, data_disks, | |
9737 | - &dd_idx, &pd_idx, raid_conf); | |
9738 | + &dd_idx, &pd_idx, conf); | |
9739 | ||
9740 | PRINTK(("raid5_make_request, sector %lu\n", new_sector)); | |
9741 | repeat: | |
9742 | - sh = get_stripe(raid_conf, new_sector, bh->b_size); | |
9743 | + sh = get_stripe(conf, new_sector, bh->b_size); | |
9744 | if ((rw == READ && sh->cmd == STRIPE_WRITE) || (rw == WRITE && sh->cmd == STRIPE_READ)) { | |
9745 | PRINTK(("raid5: lock contention, rw == %d, sh->cmd == %d\n", rw, sh->cmd)); | |
9746 | lock_stripe(sh); | |
9747 | - if (!sh->nr_pending) | |
9748 | + if (!md_atomic_read(&sh->nr_pending)) | |
9749 | handle_stripe(sh); | |
9750 | goto repeat; | |
9751 | } | |
9752 | @@ -1221,24 +1303,24 @@ | |
9753 | printk("raid5: bug: stripe->bh_new[%d], sector %lu exists\n", dd_idx, sh->sector); | |
9754 | printk("raid5: bh %p, bh_new %p\n", bh, sh->bh_new[dd_idx]); | |
9755 | lock_stripe(sh); | |
9756 | - md_wakeup_thread(raid_conf->thread); | |
9757 | + md_wakeup_thread(conf->thread); | |
9758 | wait_on_stripe(sh); | |
9759 | goto repeat; | |
9760 | } | |
9761 | add_stripe_bh(sh, bh, dd_idx, rw); | |
9762 | ||
9763 | - md_wakeup_thread(raid_conf->thread); | |
9764 | + md_wakeup_thread(conf->thread); | |
9765 | return 0; | |
9766 | } | |
9767 | ||
9768 | static void unplug_devices(struct stripe_head *sh) | |
9769 | { | |
9770 | #if 0 | |
9771 | - struct raid5_data *raid_conf = sh->raid_conf; | |
9772 | + raid5_conf_t *conf = sh->raid_conf; | |
9773 | int i; | |
9774 | ||
9775 | - for (i = 0; i < raid_conf->raid_disks; i++) | |
9776 | - unplug_device(blk_dev + MAJOR(raid_conf->disks[i].dev)); | |
9777 | + for (i = 0; i < conf->raid_disks; i++) | |
9778 | + unplug_device(blk_dev + MAJOR(conf->disks[i].dev)); | |
9779 | #endif | |
9780 | } | |
9781 | ||
9782 | @@ -1252,8 +1334,8 @@ | |
9783 | static void raid5d (void *data) | |
9784 | { | |
9785 | struct stripe_head *sh; | |
9786 | - struct raid5_data *raid_conf = data; | |
9787 | - struct md_dev *mddev = raid_conf->mddev; | |
9788 | + raid5_conf_t *conf = data; | |
9789 | + mddev_t *mddev = conf->mddev; | |
9790 | int i, handled = 0, unplug = 0; | |
9791 | unsigned long flags; | |
9792 | ||
9793 | @@ -1261,47 +1343,47 @@ | |
9794 | ||
9795 | if (mddev->sb_dirty) { | |
9796 | mddev->sb_dirty = 0; | |
9797 | - md_update_sb((int) (mddev - md_dev)); | |
9798 | + md_update_sb(mddev); | |
9799 | } | |
9800 | for (i = 0; i < NR_HASH; i++) { | |
9801 | repeat: | |
9802 | - sh = raid_conf->stripe_hashtbl[i]; | |
9803 | + sh = conf->stripe_hashtbl[i]; | |
9804 | for (; sh; sh = sh->hash_next) { | |
9805 | - if (sh->raid_conf != raid_conf) | |
9806 | + if (sh->raid_conf != conf) | |
9807 | continue; | |
9808 | if (sh->phase == PHASE_COMPLETE) | |
9809 | continue; | |
9810 | - if (sh->nr_pending) | |
9811 | + if (md_atomic_read(&sh->nr_pending)) | |
9812 | continue; | |
9813 | - if (sh->sector == raid_conf->next_sector) { | |
9814 | - raid_conf->sector_count += (sh->size >> 9); | |
9815 | - if (raid_conf->sector_count >= 128) | |
9816 | + if (sh->sector == conf->next_sector) { | |
9817 | + conf->sector_count += (sh->size >> 9); | |
9818 | + if (conf->sector_count >= 128) | |
9819 | unplug = 1; | |
9820 | } else | |
9821 | unplug = 1; | |
9822 | if (unplug) { | |
9823 | - PRINTK(("unplugging devices, sector == %lu, count == %d\n", sh->sector, raid_conf->sector_count)); | |
9824 | + PRINTK(("unplugging devices, sector == %lu, count == %d\n", sh->sector, conf->sector_count)); | |
9825 | unplug_devices(sh); | |
9826 | unplug = 0; | |
9827 | - raid_conf->sector_count = 0; | |
9828 | + conf->sector_count = 0; | |
9829 | } | |
9830 | - raid_conf->next_sector = sh->sector + (sh->size >> 9); | |
9831 | + conf->next_sector = sh->sector + (sh->size >> 9); | |
9832 | handled++; | |
9833 | handle_stripe(sh); | |
9834 | goto repeat; | |
9835 | } | |
9836 | } | |
9837 | - if (raid_conf) { | |
9838 | - PRINTK(("%d stripes handled, nr_handle %d\n", handled, atomic_read(&raid_conf->nr_handle))); | |
9839 | + if (conf) { | |
9840 | + PRINTK(("%d stripes handled, nr_handle %d\n", handled, md_atomic_read(&conf->nr_handle))); | |
9841 | save_flags(flags); | |
9842 | cli(); | |
9843 | - if (!atomic_read(&raid_conf->nr_handle)) | |
9844 | - clear_bit(THREAD_WAKEUP, &raid_conf->thread->flags); | |
9845 | + if (!md_atomic_read(&conf->nr_handle)) | |
9846 | + clear_bit(THREAD_WAKEUP, &conf->thread->flags); | |
9847 | + restore_flags(flags); | |
9848 | } | |
9849 | PRINTK(("--- raid5d inactive\n")); | |
9850 | } | |
9851 | ||
9852 | -#if SUPPORT_RECONSTRUCTION | |
9853 | /* | |
9854 | * Private kernel thread for parity reconstruction after an unclean | |
9855 | * shutdown. Reconstruction on spare drives in case of a failed drive | |
9856 | @@ -1309,44 +1391,64 @@ | |
9857 | */ | |
9858 | static void raid5syncd (void *data) | |
9859 | { | |
9860 | - struct raid5_data *raid_conf = data; | |
9861 | - struct md_dev *mddev = raid_conf->mddev; | |
9862 | + raid5_conf_t *conf = data; | |
9863 | + mddev_t *mddev = conf->mddev; | |
9864 | ||
9865 | - if (!raid_conf->resync_parity) | |
9866 | + if (!conf->resync_parity) | |
9867 | + return; | |
9868 | + if (conf->resync_parity == 2) | |
9869 | + return; | |
9870 | + down(&mddev->recovery_sem); | |
9871 | + if (md_do_sync(mddev,NULL)) { | |
9872 | + up(&mddev->recovery_sem); | |
9873 | + printk("raid5: resync aborted!\n"); | |
9874 | return; | |
9875 | - md_do_sync(mddev); | |
9876 | - raid_conf->resync_parity = 0; | |
9877 | + } | |
9878 | + conf->resync_parity = 0; | |
9879 | + up(&mddev->recovery_sem); | |
9880 | + printk("raid5: resync finished.\n"); | |
9881 | } | |
9882 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
9883 | ||
9884 | -static int __check_consistency (struct md_dev *mddev, int row) | |
9885 | +static int __check_consistency (mddev_t *mddev, int row) | |
9886 | { | |
9887 | - struct raid5_data *raid_conf = mddev->private; | |
9888 | + raid5_conf_t *conf = mddev->private; | |
9889 | kdev_t dev; | |
9890 | struct buffer_head *bh[MD_SB_DISKS], tmp; | |
9891 | - int i, rc = 0, nr = 0; | |
9892 | + int i, rc = 0, nr = 0, count; | |
9893 | + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; | |
9894 | ||
9895 | - if (raid_conf->working_disks != raid_conf->raid_disks) | |
9896 | + if (conf->working_disks != conf->raid_disks) | |
9897 | return 0; | |
9898 | tmp.b_size = 4096; | |
9899 | if ((tmp.b_data = (char *) get_free_page(GFP_KERNEL)) == NULL) | |
9900 | return 0; | |
9901 | + md_clear_page((unsigned long)tmp.b_data); | |
9902 | memset(bh, 0, MD_SB_DISKS * sizeof(struct buffer_head *)); | |
9903 | - for (i = 0; i < raid_conf->raid_disks; i++) { | |
9904 | - dev = raid_conf->disks[i].dev; | |
9905 | + for (i = 0; i < conf->raid_disks; i++) { | |
9906 | + dev = conf->disks[i].dev; | |
9907 | set_blocksize(dev, 4096); | |
9908 | if ((bh[i] = bread(dev, row / 4, 4096)) == NULL) | |
9909 | break; | |
9910 | nr++; | |
9911 | } | |
9912 | - if (nr == raid_conf->raid_disks) { | |
9913 | - for (i = 1; i < nr; i++) | |
9914 | - xor_block(&tmp, bh[i]); | |
9915 | + if (nr == conf->raid_disks) { | |
9916 | + bh_ptr[0] = &tmp; | |
9917 | + count = 1; | |
9918 | + for (i = 1; i < nr; i++) { | |
9919 | + bh_ptr[count++] = bh[i]; | |
9920 | + if (count == MAX_XOR_BLOCKS) { | |
9921 | + xor_block(count, &bh_ptr[0]); | |
9922 | + count = 1; | |
9923 | + } | |
9924 | + } | |
9925 | + if (count != 1) { | |
9926 | + xor_block(count, &bh_ptr[0]); | |
9927 | + } | |
9928 | if (memcmp(tmp.b_data, bh[0]->b_data, 4096)) | |
9929 | rc = 1; | |
9930 | } | |
9931 | - for (i = 0; i < raid_conf->raid_disks; i++) { | |
9932 | - dev = raid_conf->disks[i].dev; | |
9933 | + for (i = 0; i < conf->raid_disks; i++) { | |
9934 | + dev = conf->disks[i].dev; | |
9935 | if (bh[i]) { | |
9936 | bforget(bh[i]); | |
9937 | bh[i] = NULL; | |
9938 | @@ -1358,285 +1460,607 @@ | |
9939 | return rc; | |
9940 | } | |
9941 | ||
9942 | -static int check_consistency (struct md_dev *mddev) | |
9943 | +static int check_consistency (mddev_t *mddev) | |
9944 | { | |
9945 | - int size = mddev->sb->size; | |
9946 | - int row; | |
9947 | + if (__check_consistency(mddev, 0)) | |
9948 | +/* | |
9949 | + * We are not checking this currently, as it's legitimate to have | |
9950 | + * an inconsistent array, at creation time. | |
9951 | + */ | |
9952 | + return 0; | |
9953 | ||
9954 | - for (row = 0; row < size; row += size / 8) | |
9955 | - if (__check_consistency(mddev, row)) | |
9956 | - return 1; | |
9957 | return 0; | |
9958 | } | |
9959 | ||
9960 | -static int raid5_run (int minor, struct md_dev *mddev) | |
9961 | +static int raid5_run (mddev_t *mddev) | |
9962 | { | |
9963 | - struct raid5_data *raid_conf; | |
9964 | + raid5_conf_t *conf; | |
9965 | int i, j, raid_disk, memory; | |
9966 | - md_superblock_t *sb = mddev->sb; | |
9967 | - md_descriptor_t *descriptor; | |
9968 | - struct real_dev *realdev; | |
9969 | + mdp_super_t *sb = mddev->sb; | |
9970 | + mdp_disk_t *desc; | |
9971 | + mdk_rdev_t *rdev; | |
9972 | + struct disk_info *disk; | |
9973 | + struct md_list_head *tmp; | |
9974 | + int start_recovery = 0; | |
9975 | ||
9976 | MOD_INC_USE_COUNT; | |
9977 | ||
9978 | if (sb->level != 5 && sb->level != 4) { | |
9979 | - printk("raid5: %s: raid level not set to 4/5 (%d)\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->level); | |
9980 | + printk("raid5: md%d: raid level not set to 4/5 (%d)\n", mdidx(mddev), sb->level); | |
9981 | MOD_DEC_USE_COUNT; | |
9982 | return -EIO; | |
9983 | } | |
9984 | ||
9985 | - mddev->private = kmalloc (sizeof (struct raid5_data), GFP_KERNEL); | |
9986 | - if ((raid_conf = mddev->private) == NULL) | |
9987 | + mddev->private = kmalloc (sizeof (raid5_conf_t), GFP_KERNEL); | |
9988 | + if ((conf = mddev->private) == NULL) | |
9989 | goto abort; | |
9990 | - memset (raid_conf, 0, sizeof (*raid_conf)); | |
9991 | - raid_conf->mddev = mddev; | |
9992 | + memset (conf, 0, sizeof (*conf)); | |
9993 | + conf->mddev = mddev; | |
9994 | ||
9995 | - if ((raid_conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) | |
9996 | + if ((conf->stripe_hashtbl = (struct stripe_head **) md__get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) | |
9997 | goto abort; | |
9998 | - memset(raid_conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); | |
9999 | + memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); | |
10000 | ||
10001 | - init_waitqueue(&raid_conf->wait_for_stripe); | |
10002 | - PRINTK(("raid5_run(%d) called.\n", minor)); | |
10003 | - | |
10004 | - for (i = 0; i < mddev->nb_dev; i++) { | |
10005 | - realdev = &mddev->devices[i]; | |
10006 | - if (!realdev->sb) { | |
10007 | - printk(KERN_ERR "raid5: disabled device %s (couldn't access raid superblock)\n", kdevname(realdev->dev)); | |
10008 | - continue; | |
10009 | - } | |
10010 | + init_waitqueue(&conf->wait_for_stripe); | |
10011 | + PRINTK(("raid5_run(md%d) called.\n", mdidx(mddev))); | |
10012 | ||
10013 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
10014 | /* | |
10015 | * This is important -- we are using the descriptor on | |
10016 | * the disk only to get a pointer to the descriptor on | |
10017 | * the main superblock, which might be more recent. | |
10018 | */ | |
10019 | - descriptor = &sb->disks[realdev->sb->descriptor.number]; | |
10020 | - if (descriptor->state & (1 << MD_FAULTY_DEVICE)) { | |
10021 | - printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", kdevname(realdev->dev)); | |
10022 | + desc = sb->disks + rdev->desc_nr; | |
10023 | + raid_disk = desc->raid_disk; | |
10024 | + disk = conf->disks + raid_disk; | |
10025 | + | |
10026 | + if (disk_faulty(desc)) { | |
10027 | + printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", partition_name(rdev->dev)); | |
10028 | + if (!rdev->faulty) { | |
10029 | + MD_BUG(); | |
10030 | + goto abort; | |
10031 | + } | |
10032 | + disk->number = desc->number; | |
10033 | + disk->raid_disk = raid_disk; | |
10034 | + disk->dev = rdev->dev; | |
10035 | + | |
10036 | + disk->operational = 0; | |
10037 | + disk->write_only = 0; | |
10038 | + disk->spare = 0; | |
10039 | + disk->used_slot = 1; | |
10040 | continue; | |
10041 | } | |
10042 | - if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) { | |
10043 | - if (!(descriptor->state & (1 << MD_SYNC_DEVICE))) { | |
10044 | - printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", kdevname(realdev->dev)); | |
10045 | - continue; | |
10046 | + if (disk_active(desc)) { | |
10047 | + if (!disk_sync(desc)) { | |
10048 | + printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", partition_name(rdev->dev)); | |
10049 | + MD_BUG(); | |
10050 | + goto abort; | |
10051 | } | |
10052 | - raid_disk = descriptor->raid_disk; | |
10053 | - if (descriptor->number > sb->nr_disks || raid_disk > sb->raid_disks) { | |
10054 | - printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", kdevname(realdev->dev)); | |
10055 | + if (raid_disk > sb->raid_disks) { | |
10056 | + printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", partition_name(rdev->dev)); | |
10057 | continue; | |
10058 | } | |
10059 | - if (raid_conf->disks[raid_disk].operational) { | |
10060 | - printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", kdevname(realdev->dev), raid_disk); | |
10061 | + if (disk->operational) { | |
10062 | + printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", partition_name(rdev->dev), raid_disk); | |
10063 | continue; | |
10064 | } | |
10065 | - printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", kdevname(realdev->dev), raid_disk); | |
10066 | + printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", partition_name(rdev->dev), raid_disk); | |
10067 | ||
10068 | - raid_conf->disks[raid_disk].number = descriptor->number; | |
10069 | - raid_conf->disks[raid_disk].raid_disk = raid_disk; | |
10070 | - raid_conf->disks[raid_disk].dev = mddev->devices[i].dev; | |
10071 | - raid_conf->disks[raid_disk].operational = 1; | |
10072 | + disk->number = desc->number; | |
10073 | + disk->raid_disk = raid_disk; | |
10074 | + disk->dev = rdev->dev; | |
10075 | + disk->operational = 1; | |
10076 | + disk->used_slot = 1; | |
10077 | ||
10078 | - raid_conf->working_disks++; | |
10079 | + conf->working_disks++; | |
10080 | } else { | |
10081 | /* | |
10082 | * Must be a spare disk .. | |
10083 | */ | |
10084 | - printk(KERN_INFO "raid5: spare disk %s\n", kdevname(realdev->dev)); | |
10085 | - raid_disk = descriptor->raid_disk; | |
10086 | - raid_conf->disks[raid_disk].number = descriptor->number; | |
10087 | - raid_conf->disks[raid_disk].raid_disk = raid_disk; | |
10088 | - raid_conf->disks[raid_disk].dev = mddev->devices [i].dev; | |
10089 | - | |
10090 | - raid_conf->disks[raid_disk].operational = 0; | |
10091 | - raid_conf->disks[raid_disk].write_only = 0; | |
10092 | - raid_conf->disks[raid_disk].spare = 1; | |
10093 | - } | |
10094 | - } | |
10095 | - raid_conf->raid_disks = sb->raid_disks; | |
10096 | - raid_conf->failed_disks = raid_conf->raid_disks - raid_conf->working_disks; | |
10097 | - raid_conf->mddev = mddev; | |
10098 | - raid_conf->chunk_size = sb->chunk_size; | |
10099 | - raid_conf->level = sb->level; | |
10100 | - raid_conf->algorithm = sb->parity_algorithm; | |
10101 | - raid_conf->max_nr_stripes = NR_STRIPES; | |
10102 | + printk(KERN_INFO "raid5: spare disk %s\n", partition_name(rdev->dev)); | |
10103 | + disk->number = desc->number; | |
10104 | + disk->raid_disk = raid_disk; | |
10105 | + disk->dev = rdev->dev; | |
10106 | ||
10107 | - if (raid_conf->working_disks != sb->raid_disks && sb->state != (1 << MD_SB_CLEAN)) { | |
10108 | - printk(KERN_ALERT "raid5: raid set %s not clean and not all disks are operational -- run ckraid\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
10109 | - goto abort; | |
10110 | + disk->operational = 0; | |
10111 | + disk->write_only = 0; | |
10112 | + disk->spare = 1; | |
10113 | + disk->used_slot = 1; | |
10114 | + } | |
10115 | } | |
10116 | - if (!raid_conf->chunk_size || raid_conf->chunk_size % 4) { | |
10117 | - printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", raid_conf->chunk_size, kdevname(MKDEV(MD_MAJOR, minor))); | |
10118 | + | |
10119 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10120 | + desc = sb->disks + i; | |
10121 | + raid_disk = desc->raid_disk; | |
10122 | + disk = conf->disks + raid_disk; | |
10123 | + | |
10124 | + if (disk_faulty(desc) && (raid_disk < sb->raid_disks) && | |
10125 | + !conf->disks[raid_disk].used_slot) { | |
10126 | + | |
10127 | + disk->number = desc->number; | |
10128 | + disk->raid_disk = raid_disk; | |
10129 | + disk->dev = MKDEV(0,0); | |
10130 | + | |
10131 | + disk->operational = 0; | |
10132 | + disk->write_only = 0; | |
10133 | + disk->spare = 0; | |
10134 | + disk->used_slot = 1; | |
10135 | + } | |
10136 | + } | |
10137 | + | |
10138 | + conf->raid_disks = sb->raid_disks; | |
10139 | + /* | |
10140 | + * 0 for a fully functional array, 1 for a degraded array. | |
10141 | + */ | |
10142 | + conf->failed_disks = conf->raid_disks - conf->working_disks; | |
10143 | + conf->mddev = mddev; | |
10144 | + conf->chunk_size = sb->chunk_size; | |
10145 | + conf->level = sb->level; | |
10146 | + conf->algorithm = sb->layout; | |
10147 | + conf->max_nr_stripes = NR_STRIPES; | |
10148 | + | |
10149 | +#if 0 | |
10150 | + for (i = 0; i < conf->raid_disks; i++) { | |
10151 | + if (!conf->disks[i].used_slot) { | |
10152 | + MD_BUG(); | |
10153 | + goto abort; | |
10154 | + } | |
10155 | + } | |
10156 | +#endif | |
10157 | + if (!conf->chunk_size || conf->chunk_size % 4) { | |
10158 | + printk(KERN_ERR "raid5: invalid chunk size %d for md%d\n", conf->chunk_size, mdidx(mddev)); | |
10159 | goto abort; | |
10160 | } | |
10161 | - if (raid_conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { | |
10162 | - printk(KERN_ERR "raid5: unsupported parity algorithm %d for %s\n", raid_conf->algorithm, kdevname(MKDEV(MD_MAJOR, minor))); | |
10163 | + if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { | |
10164 | + printk(KERN_ERR "raid5: unsupported parity algorithm %d for md%d\n", conf->algorithm, mdidx(mddev)); | |
10165 | goto abort; | |
10166 | } | |
10167 | - if (raid_conf->failed_disks > 1) { | |
10168 | - printk(KERN_ERR "raid5: not enough operational devices for %s (%d/%d failed)\n", kdevname(MKDEV(MD_MAJOR, minor)), raid_conf->failed_disks, raid_conf->raid_disks); | |
10169 | + if (conf->failed_disks > 1) { | |
10170 | + printk(KERN_ERR "raid5: not enough operational devices for md%d (%d/%d failed)\n", mdidx(mddev), conf->failed_disks, conf->raid_disks); | |
10171 | goto abort; | |
10172 | } | |
10173 | ||
10174 | - if ((sb->state & (1 << MD_SB_CLEAN)) && check_consistency(mddev)) { | |
10175 | - printk(KERN_ERR "raid5: detected raid-5 xor inconsistenty -- run ckraid\n"); | |
10176 | - sb->state |= 1 << MD_SB_ERRORS; | |
10177 | - goto abort; | |
10178 | + if (conf->working_disks != sb->raid_disks) { | |
10179 | + printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); | |
10180 | + start_recovery = 1; | |
10181 | } | |
10182 | ||
10183 | - if ((raid_conf->thread = md_register_thread(raid5d, raid_conf)) == NULL) { | |
10184 | - printk(KERN_ERR "raid5: couldn't allocate thread for %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
10185 | - goto abort; | |
10186 | + if (!start_recovery && (sb->state & (1 << MD_SB_CLEAN)) && | |
10187 | + check_consistency(mddev)) { | |
10188 | + printk(KERN_ERR "raid5: detected raid-5 superblock xor inconsistency -- running resync\n"); | |
10189 | + sb->state &= ~(1 << MD_SB_CLEAN); | |
10190 | } | |
10191 | ||
10192 | -#if SUPPORT_RECONSTRUCTION | |
10193 | - if ((raid_conf->resync_thread = md_register_thread(raid5syncd, raid_conf)) == NULL) { | |
10194 | - printk(KERN_ERR "raid5: couldn't allocate thread for %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
10195 | - goto abort; | |
10196 | + { | |
10197 | + const char * name = "raid5d"; | |
10198 | + | |
10199 | + conf->thread = md_register_thread(raid5d, conf, name); | |
10200 | + if (!conf->thread) { | |
10201 | + printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev)); | |
10202 | + goto abort; | |
10203 | + } | |
10204 | } | |
10205 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
10206 | ||
10207 | - memory = raid_conf->max_nr_stripes * (sizeof(struct stripe_head) + | |
10208 | - raid_conf->raid_disks * (sizeof(struct buffer_head) + | |
10209 | + memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | |
10210 | + conf->raid_disks * (sizeof(struct buffer_head) + | |
10211 | 2 * (sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; | |
10212 | - if (grow_stripes(raid_conf, raid_conf->max_nr_stripes, GFP_KERNEL)) { | |
10213 | + if (grow_stripes(conf, conf->max_nr_stripes, GFP_KERNEL)) { | |
10214 | printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); | |
10215 | - shrink_stripes(raid_conf, raid_conf->max_nr_stripes); | |
10216 | + shrink_stripes(conf, conf->max_nr_stripes); | |
10217 | goto abort; | |
10218 | } else | |
10219 | - printk(KERN_INFO "raid5: allocated %dkB for %s\n", memory, kdevname(MKDEV(MD_MAJOR, minor))); | |
10220 | + printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev)); | |
10221 | ||
10222 | /* | |
10223 | * Regenerate the "device is in sync with the raid set" bit for | |
10224 | * each device. | |
10225 | */ | |
10226 | - for (i = 0; i < sb->nr_disks ; i++) { | |
10227 | - sb->disks[i].state &= ~(1 << MD_SYNC_DEVICE); | |
10228 | + for (i = 0; i < MD_SB_DISKS ; i++) { | |
10229 | + mark_disk_nonsync(sb->disks + i); | |
10230 | for (j = 0; j < sb->raid_disks; j++) { | |
10231 | - if (!raid_conf->disks[j].operational) | |
10232 | + if (!conf->disks[j].operational) | |
10233 | continue; | |
10234 | - if (sb->disks[i].number == raid_conf->disks[j].number) | |
10235 | - sb->disks[i].state |= 1 << MD_SYNC_DEVICE; | |
10236 | + if (sb->disks[i].number == conf->disks[j].number) | |
10237 | + mark_disk_sync(sb->disks + i); | |
10238 | } | |
10239 | } | |
10240 | - sb->active_disks = raid_conf->working_disks; | |
10241 | + sb->active_disks = conf->working_disks; | |
10242 | ||
10243 | if (sb->active_disks == sb->raid_disks) | |
10244 | - printk("raid5: raid level %d set %s active with %d out of %d devices, algorithm %d\n", raid_conf->level, kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks, raid_conf->algorithm); | |
10245 | + printk("raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); | |
10246 | else | |
10247 | - printk(KERN_ALERT "raid5: raid level %d set %s active with %d out of %d devices, algorithm %d\n", raid_conf->level, kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks, raid_conf->algorithm); | |
10248 | + printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); | |
10249 | + | |
10250 | + if (!start_recovery && ((sb->state & (1 << MD_SB_CLEAN))==0)) { | |
10251 | + const char * name = "raid5syncd"; | |
10252 | + | |
10253 | + conf->resync_thread = md_register_thread(raid5syncd, conf,name); | |
10254 | + if (!conf->resync_thread) { | |
10255 | + printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev)); | |
10256 | + goto abort; | |
10257 | + } | |
10258 | ||
10259 | - if ((sb->state & (1 << MD_SB_CLEAN)) == 0) { | |
10260 | - printk("raid5: raid set %s not clean; re-constructing parity\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
10261 | - raid_conf->resync_parity = 1; | |
10262 | -#if SUPPORT_RECONSTRUCTION | |
10263 | - md_wakeup_thread(raid_conf->resync_thread); | |
10264 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
10265 | + printk("raid5: raid set md%d not clean; reconstructing parity\n", mdidx(mddev)); | |
10266 | + conf->resync_parity = 1; | |
10267 | + md_wakeup_thread(conf->resync_thread); | |
10268 | } | |
10269 | ||
10270 | + print_raid5_conf(conf); | |
10271 | + if (start_recovery) | |
10272 | + md_recover_arrays(); | |
10273 | + print_raid5_conf(conf); | |
10274 | + | |
10275 | /* Ok, everything is just fine now */ | |
10276 | return (0); | |
10277 | abort: | |
10278 | - if (raid_conf) { | |
10279 | - if (raid_conf->stripe_hashtbl) | |
10280 | - free_pages((unsigned long) raid_conf->stripe_hashtbl, HASH_PAGES_ORDER); | |
10281 | - kfree(raid_conf); | |
10282 | + if (conf) { | |
10283 | + print_raid5_conf(conf); | |
10284 | + if (conf->stripe_hashtbl) | |
10285 | + free_pages((unsigned long) conf->stripe_hashtbl, | |
10286 | + HASH_PAGES_ORDER); | |
10287 | + kfree(conf); | |
10288 | } | |
10289 | mddev->private = NULL; | |
10290 | - printk(KERN_ALERT "raid5: failed to run raid set %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
10291 | + printk(KERN_ALERT "raid5: failed to run raid set md%d\n", mdidx(mddev)); | |
10292 | MOD_DEC_USE_COUNT; | |
10293 | return -EIO; | |
10294 | } | |
10295 | ||
10296 | -static int raid5_stop (int minor, struct md_dev *mddev) | |
10297 | +static int raid5_stop_resync (mddev_t *mddev) | |
10298 | +{ | |
10299 | + raid5_conf_t *conf = mddev_to_conf(mddev); | |
10300 | + mdk_thread_t *thread = conf->resync_thread; | |
10301 | + | |
10302 | + if (thread) { | |
10303 | + if (conf->resync_parity) { | |
10304 | + conf->resync_parity = 2; | |
10305 | + md_interrupt_thread(thread); | |
10306 | + printk(KERN_INFO "raid5: parity resync was not fully finished, restarting next time.\n"); | |
10307 | + return 1; | |
10308 | + } | |
10309 | + return 0; | |
10310 | + } | |
10311 | + return 0; | |
10312 | +} | |
10313 | + | |
10314 | +static int raid5_restart_resync (mddev_t *mddev) | |
10315 | { | |
10316 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
10317 | + raid5_conf_t *conf = mddev_to_conf(mddev); | |
10318 | ||
10319 | - shrink_stripe_cache(raid_conf, raid_conf->max_nr_stripes); | |
10320 | - shrink_stripes(raid_conf, raid_conf->max_nr_stripes); | |
10321 | - md_unregister_thread(raid_conf->thread); | |
10322 | -#if SUPPORT_RECONSTRUCTION | |
10323 | - md_unregister_thread(raid_conf->resync_thread); | |
10324 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
10325 | - free_pages((unsigned long) raid_conf->stripe_hashtbl, HASH_PAGES_ORDER); | |
10326 | - kfree(raid_conf); | |
10327 | + if (conf->resync_parity) { | |
10328 | + if (!conf->resync_thread) { | |
10329 | + MD_BUG(); | |
10330 | + return 0; | |
10331 | + } | |
10332 | + printk("raid5: waking up raid5resync.\n"); | |
10333 | + conf->resync_parity = 1; | |
10334 | + md_wakeup_thread(conf->resync_thread); | |
10335 | + return 1; | |
10336 | + } else | |
10337 | + printk("raid5: no restart-resync needed.\n"); | |
10338 | + return 0; | |
10339 | +} | |
10340 | + | |
10341 | + | |
10342 | +static int raid5_stop (mddev_t *mddev) | |
10343 | +{ | |
10344 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
10345 | + | |
10346 | + shrink_stripe_cache(conf, conf->max_nr_stripes); | |
10347 | + shrink_stripes(conf, conf->max_nr_stripes); | |
10348 | + md_unregister_thread(conf->thread); | |
10349 | + if (conf->resync_thread) | |
10350 | + md_unregister_thread(conf->resync_thread); | |
10351 | + free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); | |
10352 | + kfree(conf); | |
10353 | mddev->private = NULL; | |
10354 | MOD_DEC_USE_COUNT; | |
10355 | return 0; | |
10356 | } | |
10357 | ||
10358 | -static int raid5_status (char *page, int minor, struct md_dev *mddev) | |
10359 | +static int raid5_status (char *page, mddev_t *mddev) | |
10360 | { | |
10361 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
10362 | - md_superblock_t *sb = mddev->sb; | |
10363 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
10364 | + mdp_super_t *sb = mddev->sb; | |
10365 | int sz = 0, i; | |
10366 | ||
10367 | - sz += sprintf (page+sz, " level %d, %dk chunk, algorithm %d", sb->level, sb->chunk_size >> 10, sb->parity_algorithm); | |
10368 | - sz += sprintf (page+sz, " [%d/%d] [", raid_conf->raid_disks, raid_conf->working_disks); | |
10369 | - for (i = 0; i < raid_conf->raid_disks; i++) | |
10370 | - sz += sprintf (page+sz, "%s", raid_conf->disks[i].operational ? "U" : "_"); | |
10371 | + sz += sprintf (page+sz, " level %d, %dk chunk, algorithm %d", sb->level, sb->chunk_size >> 10, sb->layout); | |
10372 | + sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks); | |
10373 | + for (i = 0; i < conf->raid_disks; i++) | |
10374 | + sz += sprintf (page+sz, "%s", conf->disks[i].operational ? "U" : "_"); | |
10375 | sz += sprintf (page+sz, "]"); | |
10376 | return sz; | |
10377 | } | |
10378 | ||
10379 | -static int raid5_mark_spare(struct md_dev *mddev, md_descriptor_t *spare, int state) | |
10380 | +static void print_raid5_conf (raid5_conf_t *conf) | |
10381 | +{ | |
10382 | + int i; | |
10383 | + struct disk_info *tmp; | |
10384 | + | |
10385 | + printk("RAID5 conf printout:\n"); | |
10386 | + if (!conf) { | |
10387 | + printk("(conf==NULL)\n"); | |
10388 | + return; | |
10389 | + } | |
10390 | + printk(" --- rd:%d wd:%d fd:%d\n", conf->raid_disks, | |
10391 | + conf->working_disks, conf->failed_disks); | |
10392 | + | |
10393 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10394 | + tmp = conf->disks + i; | |
10395 | + printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", | |
10396 | + i, tmp->spare,tmp->operational, | |
10397 | + tmp->number,tmp->raid_disk,tmp->used_slot, | |
10398 | + partition_name(tmp->dev)); | |
10399 | + } | |
10400 | +} | |
10401 | + | |
10402 | +static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) | |
10403 | { | |
10404 | - int i = 0, failed_disk = -1; | |
10405 | - struct raid5_data *raid_conf = mddev->private; | |
10406 | - struct disk_info *disk = raid_conf->disks; | |
10407 | + int err = 0; | |
10408 | + int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; | |
10409 | + raid5_conf_t *conf = mddev->private; | |
10410 | + struct disk_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; | |
10411 | unsigned long flags; | |
10412 | - md_superblock_t *sb = mddev->sb; | |
10413 | - md_descriptor_t *descriptor; | |
10414 | + mdp_super_t *sb = mddev->sb; | |
10415 | + mdp_disk_t *failed_desc, *spare_desc, *added_desc; | |
10416 | ||
10417 | - for (i = 0; i < MD_SB_DISKS; i++, disk++) { | |
10418 | - if (disk->spare && disk->number == spare->number) | |
10419 | - goto found; | |
10420 | - } | |
10421 | - return 1; | |
10422 | -found: | |
10423 | - for (i = 0, disk = raid_conf->disks; i < raid_conf->raid_disks; i++, disk++) | |
10424 | - if (!disk->operational) | |
10425 | - failed_disk = i; | |
10426 | - if (failed_disk == -1) | |
10427 | - return 1; | |
10428 | save_flags(flags); | |
10429 | cli(); | |
10430 | + | |
10431 | + print_raid5_conf(conf); | |
10432 | + /* | |
10433 | + * find the disk ... | |
10434 | + */ | |
10435 | switch (state) { | |
10436 | - case SPARE_WRITE: | |
10437 | - disk->operational = 1; | |
10438 | - disk->write_only = 1; | |
10439 | - raid_conf->spare = disk; | |
10440 | - break; | |
10441 | - case SPARE_INACTIVE: | |
10442 | - disk->operational = 0; | |
10443 | - disk->write_only = 0; | |
10444 | - raid_conf->spare = NULL; | |
10445 | - break; | |
10446 | - case SPARE_ACTIVE: | |
10447 | - disk->spare = 0; | |
10448 | - disk->write_only = 0; | |
10449 | ||
10450 | - descriptor = &sb->disks[raid_conf->disks[failed_disk].number]; | |
10451 | - i = spare->raid_disk; | |
10452 | - disk->raid_disk = spare->raid_disk = descriptor->raid_disk; | |
10453 | - if (disk->raid_disk != failed_disk) | |
10454 | - printk("raid5: disk->raid_disk != failed_disk"); | |
10455 | - descriptor->raid_disk = i; | |
10456 | - | |
10457 | - raid_conf->spare = NULL; | |
10458 | - raid_conf->working_disks++; | |
10459 | - raid_conf->failed_disks--; | |
10460 | - raid_conf->disks[failed_disk] = *disk; | |
10461 | - break; | |
10462 | - default: | |
10463 | - printk("raid5_mark_spare: bug: state == %d\n", state); | |
10464 | - restore_flags(flags); | |
10465 | - return 1; | |
10466 | + case DISKOP_SPARE_ACTIVE: | |
10467 | + | |
10468 | + /* | |
10469 | + * Find the failed disk within the RAID5 configuration ... | |
10470 | + * (this can only be in the first conf->raid_disks part) | |
10471 | + */ | |
10472 | + for (i = 0; i < conf->raid_disks; i++) { | |
10473 | + tmp = conf->disks + i; | |
10474 | + if ((!tmp->operational && !tmp->spare) || | |
10475 | + !tmp->used_slot) { | |
10476 | + failed_disk = i; | |
10477 | + break; | |
10478 | + } | |
10479 | + } | |
10480 | + /* | |
10481 | + * When we activate a spare disk we _must_ have a disk in | |
10482 | + * the lower (active) part of the array to replace. | |
10483 | + */ | |
10484 | + if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { | |
10485 | + MD_BUG(); | |
10486 | + err = 1; | |
10487 | + goto abort; | |
10488 | + } | |
10489 | + /* fall through */ | |
10490 | + | |
10491 | + case DISKOP_SPARE_WRITE: | |
10492 | + case DISKOP_SPARE_INACTIVE: | |
10493 | + | |
10494 | + /* | |
10495 | + * Find the spare disk ... (can only be in the 'high' | |
10496 | + * area of the array) | |
10497 | + */ | |
10498 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
10499 | + tmp = conf->disks + i; | |
10500 | + if (tmp->spare && tmp->number == (*d)->number) { | |
10501 | + spare_disk = i; | |
10502 | + break; | |
10503 | + } | |
10504 | + } | |
10505 | + if (spare_disk == -1) { | |
10506 | + MD_BUG(); | |
10507 | + err = 1; | |
10508 | + goto abort; | |
10509 | + } | |
10510 | + break; | |
10511 | + | |
10512 | + case DISKOP_HOT_REMOVE_DISK: | |
10513 | + | |
10514 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10515 | + tmp = conf->disks + i; | |
10516 | + if (tmp->used_slot && (tmp->number == (*d)->number)) { | |
10517 | + if (tmp->operational) { | |
10518 | + err = -EBUSY; | |
10519 | + goto abort; | |
10520 | + } | |
10521 | + removed_disk = i; | |
10522 | + break; | |
10523 | + } | |
10524 | + } | |
10525 | + if (removed_disk == -1) { | |
10526 | + MD_BUG(); | |
10527 | + err = 1; | |
10528 | + goto abort; | |
10529 | + } | |
10530 | + break; | |
10531 | + | |
10532 | + case DISKOP_HOT_ADD_DISK: | |
10533 | + | |
10534 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
10535 | + tmp = conf->disks + i; | |
10536 | + if (!tmp->used_slot) { | |
10537 | + added_disk = i; | |
10538 | + break; | |
10539 | + } | |
10540 | + } | |
10541 | + if (added_disk == -1) { | |
10542 | + MD_BUG(); | |
10543 | + err = 1; | |
10544 | + goto abort; | |
10545 | + } | |
10546 | + break; | |
10547 | + } | |
10548 | + | |
10549 | + switch (state) { | |
10550 | + /* | |
10551 | + * Switch the spare disk to write-only mode: | |
10552 | + */ | |
10553 | + case DISKOP_SPARE_WRITE: | |
10554 | + if (conf->spare) { | |
10555 | + MD_BUG(); | |
10556 | + err = 1; | |
10557 | + goto abort; | |
10558 | + } | |
10559 | + sdisk = conf->disks + spare_disk; | |
10560 | + sdisk->operational = 1; | |
10561 | + sdisk->write_only = 1; | |
10562 | + conf->spare = sdisk; | |
10563 | + break; | |
10564 | + /* | |
10565 | + * Deactivate a spare disk: | |
10566 | + */ | |
10567 | + case DISKOP_SPARE_INACTIVE: | |
10568 | + sdisk = conf->disks + spare_disk; | |
10569 | + sdisk->operational = 0; | |
10570 | + sdisk->write_only = 0; | |
10571 | + /* | |
10572 | + * Was the spare being resynced? | |
10573 | + */ | |
10574 | + if (conf->spare == sdisk) | |
10575 | + conf->spare = NULL; | |
10576 | + break; | |
10577 | + /* | |
10578 | + * Activate (mark read-write) the (now sync) spare disk, | |
10579 | + * which means we switch it's 'raid position' (->raid_disk) | |
10580 | + * with the failed disk. (only the first 'conf->raid_disks' | |
10581 | + * slots are used for 'real' disks and we must preserve this | |
10582 | + * property) | |
10583 | + */ | |
10584 | + case DISKOP_SPARE_ACTIVE: | |
10585 | + if (!conf->spare) { | |
10586 | + MD_BUG(); | |
10587 | + err = 1; | |
10588 | + goto abort; | |
10589 | + } | |
10590 | + sdisk = conf->disks + spare_disk; | |
10591 | + fdisk = conf->disks + failed_disk; | |
10592 | + | |
10593 | + spare_desc = &sb->disks[sdisk->number]; | |
10594 | + failed_desc = &sb->disks[fdisk->number]; | |
10595 | + | |
10596 | + if (spare_desc != *d) { | |
10597 | + MD_BUG(); | |
10598 | + err = 1; | |
10599 | + goto abort; | |
10600 | + } | |
10601 | + | |
10602 | + if (spare_desc->raid_disk != sdisk->raid_disk) { | |
10603 | + MD_BUG(); | |
10604 | + err = 1; | |
10605 | + goto abort; | |
10606 | + } | |
10607 | + | |
10608 | + if (sdisk->raid_disk != spare_disk) { | |
10609 | + MD_BUG(); | |
10610 | + err = 1; | |
10611 | + goto abort; | |
10612 | + } | |
10613 | + | |
10614 | + if (failed_desc->raid_disk != fdisk->raid_disk) { | |
10615 | + MD_BUG(); | |
10616 | + err = 1; | |
10617 | + goto abort; | |
10618 | + } | |
10619 | + | |
10620 | + if (fdisk->raid_disk != failed_disk) { | |
10621 | + MD_BUG(); | |
10622 | + err = 1; | |
10623 | + goto abort; | |
10624 | + } | |
10625 | + | |
10626 | + /* | |
10627 | + * do the switch finally | |
10628 | + */ | |
10629 | + xchg_values(*spare_desc, *failed_desc); | |
10630 | + xchg_values(*fdisk, *sdisk); | |
10631 | + | |
10632 | + /* | |
10633 | + * (careful, 'failed' and 'spare' are switched from now on) | |
10634 | + * | |
10635 | + * we want to preserve linear numbering and we want to | |
10636 | + * give the proper raid_disk number to the now activated | |
10637 | + * disk. (this means we switch back these values) | |
10638 | + */ | |
10639 | + | |
10640 | + xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); | |
10641 | + xchg_values(sdisk->raid_disk, fdisk->raid_disk); | |
10642 | + xchg_values(spare_desc->number, failed_desc->number); | |
10643 | + xchg_values(sdisk->number, fdisk->number); | |
10644 | + | |
10645 | + *d = failed_desc; | |
10646 | + | |
10647 | + if (sdisk->dev == MKDEV(0,0)) | |
10648 | + sdisk->used_slot = 0; | |
10649 | + | |
10650 | + /* | |
10651 | + * this really activates the spare. | |
10652 | + */ | |
10653 | + fdisk->spare = 0; | |
10654 | + fdisk->write_only = 0; | |
10655 | + | |
10656 | + /* | |
10657 | + * if we activate a spare, we definitely replace a | |
10658 | + * non-operational disk slot in the 'low' area of | |
10659 | + * the disk array. | |
10660 | + */ | |
10661 | + conf->failed_disks--; | |
10662 | + conf->working_disks++; | |
10663 | + conf->spare = NULL; | |
10664 | + | |
10665 | + break; | |
10666 | + | |
10667 | + case DISKOP_HOT_REMOVE_DISK: | |
10668 | + rdisk = conf->disks + removed_disk; | |
10669 | + | |
10670 | + if (rdisk->spare && (removed_disk < conf->raid_disks)) { | |
10671 | + MD_BUG(); | |
10672 | + err = 1; | |
10673 | + goto abort; | |
10674 | + } | |
10675 | + rdisk->dev = MKDEV(0,0); | |
10676 | + rdisk->used_slot = 0; | |
10677 | + | |
10678 | + break; | |
10679 | + | |
10680 | + case DISKOP_HOT_ADD_DISK: | |
10681 | + adisk = conf->disks + added_disk; | |
10682 | + added_desc = *d; | |
10683 | + | |
10684 | + if (added_disk != added_desc->number) { | |
10685 | + MD_BUG(); | |
10686 | + err = 1; | |
10687 | + goto abort; | |
10688 | + } | |
10689 | + | |
10690 | + adisk->number = added_desc->number; | |
10691 | + adisk->raid_disk = added_desc->raid_disk; | |
10692 | + adisk->dev = MKDEV(added_desc->major,added_desc->minor); | |
10693 | + | |
10694 | + adisk->operational = 0; | |
10695 | + adisk->write_only = 0; | |
10696 | + adisk->spare = 1; | |
10697 | + adisk->used_slot = 1; | |
10698 | + | |
10699 | + | |
10700 | + break; | |
10701 | + | |
10702 | + default: | |
10703 | + MD_BUG(); | |
10704 | + err = 1; | |
10705 | + goto abort; | |
10706 | } | |
10707 | +abort: | |
10708 | restore_flags(flags); | |
10709 | - return 0; | |
10710 | + print_raid5_conf(conf); | |
10711 | + return err; | |
10712 | } | |
10713 | ||
10714 | -static struct md_personality raid5_personality= | |
10715 | +static mdk_personality_t raid5_personality= | |
10716 | { | |
10717 | "raid5", | |
10718 | raid5_map, | |
10719 | @@ -1648,14 +2072,19 @@ | |
10720 | NULL, /* no ioctls */ | |
10721 | 0, | |
10722 | raid5_error, | |
10723 | - /* raid5_hot_add_disk, */ NULL, | |
10724 | - /* raid1_hot_remove_drive */ NULL, | |
10725 | - raid5_mark_spare | |
10726 | + raid5_diskop, | |
10727 | + raid5_stop_resync, | |
10728 | + raid5_restart_resync | |
10729 | }; | |
10730 | ||
10731 | int raid5_init (void) | |
10732 | { | |
10733 | - return register_md_personality (RAID5, &raid5_personality); | |
10734 | + int err; | |
10735 | + | |
10736 | + err = register_md_personality (RAID5, &raid5_personality); | |
10737 | + if (err) | |
10738 | + return err; | |
10739 | + return 0; | |
10740 | } | |
10741 | ||
10742 | #ifdef MODULE | |
10743 | diff -ruN linux.orig/drivers/block/translucent.c linux-2.2.16/drivers/block/translucent.c | |
10744 | --- linux.orig/drivers/block/translucent.c Thu Jan 1 01:00:00 1970 | |
10745 | +++ linux-2.2.16/drivers/block/translucent.c Fri Jun 9 11:37:45 2000 | |
10746 | @@ -0,0 +1,136 @@ | |
10747 | +/* | |
10748 | + translucent.c : Translucent RAID driver for Linux | |
10749 | + Copyright (C) 1998 Ingo Molnar | |
10750 | + | |
10751 | + Translucent mode management functions. | |
10752 | + | |
10753 | + This program is free software; you can redistribute it and/or modify | |
10754 | + it under the terms of the GNU General Public License as published by | |
10755 | + the Free Software Foundation; either version 2, or (at your option) | |
10756 | + any later version. | |
10757 | + | |
10758 | + You should have received a copy of the GNU General Public License | |
10759 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
10760 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
10761 | +*/ | |
10762 | + | |
10763 | +#include <linux/module.h> | |
10764 | + | |
10765 | +#include <linux/raid/md.h> | |
10766 | +#include <linux/malloc.h> | |
10767 | + | |
10768 | +#include <linux/raid/translucent.h> | |
10769 | + | |
10770 | +#define MAJOR_NR MD_MAJOR | |
10771 | +#define MD_DRIVER | |
10772 | +#define MD_PERSONALITY | |
10773 | + | |
10774 | +static int translucent_run (mddev_t *mddev) | |
10775 | +{ | |
10776 | + translucent_conf_t *conf; | |
10777 | + mdk_rdev_t *rdev; | |
10778 | + int i; | |
10779 | + | |
10780 | + MOD_INC_USE_COUNT; | |
10781 | + | |
10782 | + conf = kmalloc (sizeof (*conf), GFP_KERNEL); | |
10783 | + if (!conf) | |
10784 | + goto out; | |
10785 | + mddev->private = conf; | |
10786 | + | |
10787 | + if (mddev->nb_dev != 2) { | |
10788 | + printk("translucent: this mode needs 2 disks, aborting!\n"); | |
10789 | + goto out; | |
10790 | + } | |
10791 | + | |
10792 | + if (md_check_ordering(mddev)) { | |
10793 | + printk("translucent: disks are not ordered, aborting!\n"); | |
10794 | + goto out; | |
10795 | + } | |
10796 | + | |
10797 | + ITERATE_RDEV_ORDERED(mddev,rdev,i) { | |
10798 | + dev_info_t *disk = conf->disks + i; | |
10799 | + | |
10800 | + disk->dev = rdev->dev; | |
10801 | + disk->size = rdev->size; | |
10802 | + } | |
10803 | + | |
10804 | + return 0; | |
10805 | + | |
10806 | +out: | |
10807 | + if (conf) | |
10808 | + kfree(conf); | |
10809 | + | |
10810 | + MOD_DEC_USE_COUNT; | |
10811 | + return 1; | |
10812 | +} | |
10813 | + | |
10814 | +static int translucent_stop (mddev_t *mddev) | |
10815 | +{ | |
10816 | + translucent_conf_t *conf = mddev_to_conf(mddev); | |
10817 | + | |
10818 | + kfree(conf); | |
10819 | + | |
10820 | + MOD_DEC_USE_COUNT; | |
10821 | + | |
10822 | + return 0; | |
10823 | +} | |
10824 | + | |
10825 | + | |
10826 | +static int translucent_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
10827 | + unsigned long *rsector, unsigned long size) | |
10828 | +{ | |
10829 | + translucent_conf_t *conf = mddev_to_conf(mddev); | |
10830 | + | |
10831 | + *rdev = conf->disks[0].dev; | |
10832 | + | |
10833 | + return 0; | |
10834 | +} | |
10835 | + | |
10836 | +static int translucent_status (char *page, mddev_t *mddev) | |
10837 | +{ | |
10838 | + int sz = 0; | |
10839 | + | |
10840 | + sz += sprintf(page+sz, " %d%% full", 10); | |
10841 | + return sz; | |
10842 | +} | |
10843 | + | |
10844 | + | |
10845 | +static mdk_personality_t translucent_personality= | |
10846 | +{ | |
10847 | + "translucent", | |
10848 | + translucent_map, | |
10849 | + NULL, | |
10850 | + NULL, | |
10851 | + translucent_run, | |
10852 | + translucent_stop, | |
10853 | + translucent_status, | |
10854 | + NULL, | |
10855 | + 0, | |
10856 | + NULL, | |
10857 | + NULL, | |
10858 | + NULL, | |
10859 | + NULL | |
10860 | +}; | |
10861 | + | |
10862 | +#ifndef MODULE | |
10863 | + | |
10864 | +md__initfunc(void translucent_init (void)) | |
10865 | +{ | |
10866 | + register_md_personality (TRANSLUCENT, &translucent_personality); | |
10867 | +} | |
10868 | + | |
10869 | +#else | |
10870 | + | |
10871 | +int init_module (void) | |
10872 | +{ | |
10873 | + return (register_md_personality (TRANSLUCENT, &translucent_personality)); | |
10874 | +} | |
10875 | + | |
10876 | +void cleanup_module (void) | |
10877 | +{ | |
10878 | + unregister_md_personality (TRANSLUCENT); | |
10879 | +} | |
10880 | + | |
10881 | +#endif | |
10882 | + | |
10883 | diff -ruN linux.orig/drivers/block/xor.c linux-2.2.16/drivers/block/xor.c | |
10884 | --- linux.orig/drivers/block/xor.c Thu Jan 1 01:00:00 1970 | |
10885 | +++ linux-2.2.16/drivers/block/xor.c Fri Jun 9 11:37:45 2000 | |
10886 | @@ -0,0 +1,1894 @@ | |
10887 | +/* | |
10888 | + * xor.c : Multiple Devices driver for Linux | |
10889 | + * | |
10890 | + * Copyright (C) 1996, 1997, 1998, 1999 Ingo Molnar, Matti Aarnio, Jakub Jelinek | |
10891 | + * | |
10892 | + * | |
10893 | + * optimized RAID-5 checksumming functions. | |
10894 | + * | |
10895 | + * This program is free software; you can redistribute it and/or modify | |
10896 | + * it under the terms of the GNU General Public License as published by | |
10897 | + * the Free Software Foundation; either version 2, or (at your option) | |
10898 | + * any later version. | |
10899 | + * | |
10900 | + * You should have received a copy of the GNU General Public License | |
10901 | + * (for example /usr/src/linux/COPYING); if not, write to the Free | |
10902 | + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
10903 | + */ | |
10904 | +#include <linux/module.h> | |
10905 | +#include <linux/raid/md.h> | |
10906 | +#ifdef __sparc_v9__ | |
10907 | +#include <asm/head.h> | |
10908 | +#include <asm/asi.h> | |
10909 | +#include <asm/visasm.h> | |
10910 | +#endif | |
10911 | + | |
10912 | +/* | |
10913 | + * we use the 'XOR function template' to register multiple xor | |
10914 | + * functions runtime. The kernel measures their speed upon bootup | |
10915 | + * and decides which one to use. (compile-time registration is | |
10916 | + * not enough as certain CPU features like MMX can only be detected | |
10917 | + * runtime) | |
10918 | + * | |
10919 | + * this architecture makes it pretty easy to add new routines | |
10920 | + * that are faster on certain CPUs, without killing other CPU's | |
10921 | + * 'native' routine. Although the current routines are belived | |
10922 | + * to be the physically fastest ones on all CPUs tested, but | |
10923 | + * feel free to prove me wrong and add yet another routine =B-) | |
10924 | + * --mingo | |
10925 | + */ | |
10926 | + | |
10927 | +#define MAX_XOR_BLOCKS 5 | |
10928 | + | |
10929 | +#define XOR_ARGS (unsigned int count, struct buffer_head **bh_ptr) | |
10930 | + | |
10931 | +typedef void (*xor_block_t) XOR_ARGS; | |
10932 | +xor_block_t xor_block = NULL; | |
10933 | + | |
10934 | +#ifndef __sparc_v9__ | |
10935 | + | |
10936 | +struct xor_block_template; | |
10937 | + | |
10938 | +struct xor_block_template { | |
10939 | + char * name; | |
10940 | + xor_block_t xor_block; | |
10941 | + int speed; | |
10942 | + struct xor_block_template * next; | |
10943 | +}; | |
10944 | + | |
10945 | +struct xor_block_template * xor_functions = NULL; | |
10946 | + | |
10947 | +#define XORBLOCK_TEMPLATE(x) \ | |
10948 | +static void xor_block_##x XOR_ARGS; \ | |
10949 | +static struct xor_block_template t_xor_block_##x = \ | |
10950 | + { #x, xor_block_##x, 0, NULL }; \ | |
10951 | +static void xor_block_##x XOR_ARGS | |
10952 | + | |
10953 | +#ifdef __i386__ | |
10954 | + | |
10955 | +#ifdef CONFIG_X86_XMM | |
10956 | +/* | |
10957 | + * Cache avoiding checksumming functions utilizing KNI instructions | |
10958 | + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) | |
10959 | + */ | |
10960 | + | |
10961 | +XORBLOCK_TEMPLATE(pIII_kni) | |
10962 | +{ | |
10963 | + char xmm_save[16*4]; | |
10964 | + int cr0; | |
10965 | + int lines = (bh_ptr[0]->b_size>>8); | |
10966 | + | |
10967 | + __asm__ __volatile__ ( | |
10968 | + "movl %%cr0,%0 ;\n\t" | |
10969 | + "clts ;\n\t" | |
10970 | + "movups %%xmm0,(%1) ;\n\t" | |
10971 | + "movups %%xmm1,0x10(%1) ;\n\t" | |
10972 | + "movups %%xmm2,0x20(%1) ;\n\t" | |
10973 | + "movups %%xmm3,0x30(%1) ;\n\t" | |
10974 | + : "=r" (cr0) | |
10975 | + : "r" (xmm_save) | |
10976 | + : "memory" ); | |
10977 | + | |
10978 | +#define OFFS(x) "8*("#x"*2)" | |
10979 | +#define PF0(x) \ | |
10980 | + " prefetcht0 "OFFS(x)"(%1) ;\n" | |
10981 | +#define LD(x,y) \ | |
10982 | + " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" | |
10983 | +#define ST(x,y) \ | |
10984 | + " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" | |
10985 | +#define PF1(x) \ | |
10986 | + " prefetchnta "OFFS(x)"(%2) ;\n" | |
10987 | +#define PF2(x) \ | |
10988 | + " prefetchnta "OFFS(x)"(%3) ;\n" | |
10989 | +#define PF3(x) \ | |
10990 | + " prefetchnta "OFFS(x)"(%4) ;\n" | |
10991 | +#define PF4(x) \ | |
10992 | + " prefetchnta "OFFS(x)"(%5) ;\n" | |
10993 | +#define PF5(x) \ | |
10994 | + " prefetchnta "OFFS(x)"(%6) ;\n" | |
10995 | +#define XO1(x,y) \ | |
10996 | + " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" | |
10997 | +#define XO2(x,y) \ | |
10998 | + " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" | |
10999 | +#define XO3(x,y) \ | |
11000 | + " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" | |
11001 | +#define XO4(x,y) \ | |
11002 | + " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" | |
11003 | +#define XO5(x,y) \ | |
11004 | + " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" | |
11005 | + | |
11006 | + switch(count) { | |
11007 | + case 2: | |
11008 | + __asm__ __volatile__ ( | |
11009 | +#undef BLOCK | |
11010 | +#define BLOCK(i) \ | |
11011 | + LD(i,0) \ | |
11012 | + LD(i+1,1) \ | |
11013 | + PF1(i) \ | |
11014 | + PF1(i+2) \ | |
11015 | + LD(i+2,2) \ | |
11016 | + LD(i+3,3) \ | |
11017 | + PF0(i+4) \ | |
11018 | + PF0(i+6) \ | |
11019 | + XO1(i,0) \ | |
11020 | + XO1(i+1,1) \ | |
11021 | + XO1(i+2,2) \ | |
11022 | + XO1(i+3,3) \ | |
11023 | + ST(i,0) \ | |
11024 | + ST(i+1,1) \ | |
11025 | + ST(i+2,2) \ | |
11026 | + ST(i+3,3) \ | |
11027 | + | |
11028 | + | |
11029 | + PF0(0) | |
11030 | + PF0(2) | |
11031 | + | |
11032 | + " .align 32,0x90 ;\n" | |
11033 | + " 1: ;\n" | |
11034 | + | |
11035 | + BLOCK(0) | |
11036 | + BLOCK(4) | |
11037 | + BLOCK(8) | |
11038 | + BLOCK(12) | |
11039 | + | |
11040 | + " addl $256, %1 ;\n" | |
11041 | + " addl $256, %2 ;\n" | |
11042 | + " decl %0 ;\n" | |
11043 | + " jnz 1b ;\n" | |
11044 | + | |
11045 | + : | |
11046 | + : "r" (lines), | |
11047 | + "r" (bh_ptr[0]->b_data), | |
11048 | + "r" (bh_ptr[1]->b_data) | |
11049 | + : "memory" ); | |
11050 | + break; | |
11051 | + case 3: | |
11052 | + __asm__ __volatile__ ( | |
11053 | +#undef BLOCK | |
11054 | +#define BLOCK(i) \ | |
11055 | + PF1(i) \ | |
11056 | + PF1(i+2) \ | |
11057 | + LD(i,0) \ | |
11058 | + LD(i+1,1) \ | |
11059 | + LD(i+2,2) \ | |
11060 | + LD(i+3,3) \ | |
11061 | + PF2(i) \ | |
11062 | + PF2(i+2) \ | |
11063 | + PF0(i+4) \ | |
11064 | + PF0(i+6) \ | |
11065 | + XO1(i,0) \ | |
11066 | + XO1(i+1,1) \ | |
11067 | + XO1(i+2,2) \ | |
11068 | + XO1(i+3,3) \ | |
11069 | + XO2(i,0) \ | |
11070 | + XO2(i+1,1) \ | |
11071 | + XO2(i+2,2) \ | |
11072 | + XO2(i+3,3) \ | |
11073 | + ST(i,0) \ | |
11074 | + ST(i+1,1) \ | |
11075 | + ST(i+2,2) \ | |
11076 | + ST(i+3,3) \ | |
11077 | + | |
11078 | + | |
11079 | + PF0(0) | |
11080 | + PF0(2) | |
11081 | + | |
11082 | + " .align 32,0x90 ;\n" | |
11083 | + " 1: ;\n" | |
11084 | + | |
11085 | + BLOCK(0) | |
11086 | + BLOCK(4) | |
11087 | + BLOCK(8) | |
11088 | + BLOCK(12) | |
11089 | + | |
11090 | + " addl $256, %1 ;\n" | |
11091 | + " addl $256, %2 ;\n" | |
11092 | + " addl $256, %3 ;\n" | |
11093 | + " decl %0 ;\n" | |
11094 | + " jnz 1b ;\n" | |
11095 | + : | |
11096 | + : "r" (lines), | |
11097 | + "r" (bh_ptr[0]->b_data), | |
11098 | + "r" (bh_ptr[1]->b_data), | |
11099 | + "r" (bh_ptr[2]->b_data) | |
11100 | + : "memory" ); | |
11101 | + break; | |
11102 | + case 4: | |
11103 | + __asm__ __volatile__ ( | |
11104 | +#undef BLOCK | |
11105 | +#define BLOCK(i) \ | |
11106 | + PF1(i) \ | |
11107 | + PF1(i+2) \ | |
11108 | + LD(i,0) \ | |
11109 | + LD(i+1,1) \ | |
11110 | + LD(i+2,2) \ | |
11111 | + LD(i+3,3) \ | |
11112 | + PF2(i) \ | |
11113 | + PF2(i+2) \ | |
11114 | + XO1(i,0) \ | |
11115 | + XO1(i+1,1) \ | |
11116 | + XO1(i+2,2) \ | |
11117 | + XO1(i+3,3) \ | |
11118 | + PF3(i) \ | |
11119 | + PF3(i+2) \ | |
11120 | + PF0(i+4) \ | |
11121 | + PF0(i+6) \ | |
11122 | + XO2(i,0) \ | |
11123 | + XO2(i+1,1) \ | |
11124 | + XO2(i+2,2) \ | |
11125 | + XO2(i+3,3) \ | |
11126 | + XO3(i,0) \ | |
11127 | + XO3(i+1,1) \ | |
11128 | + XO3(i+2,2) \ | |
11129 | + XO3(i+3,3) \ | |
11130 | + ST(i,0) \ | |
11131 | + ST(i+1,1) \ | |
11132 | + ST(i+2,2) \ | |
11133 | + ST(i+3,3) \ | |
11134 | + | |
11135 | + | |
11136 | + PF0(0) | |
11137 | + PF0(2) | |
11138 | + | |
11139 | + " .align 32,0x90 ;\n" | |
11140 | + " 1: ;\n" | |
11141 | + | |
11142 | + BLOCK(0) | |
11143 | + BLOCK(4) | |
11144 | + BLOCK(8) | |
11145 | + BLOCK(12) | |
11146 | + | |
11147 | + " addl $256, %1 ;\n" | |
11148 | + " addl $256, %2 ;\n" | |
11149 | + " addl $256, %3 ;\n" | |
11150 | + " addl $256, %4 ;\n" | |
11151 | + " decl %0 ;\n" | |
11152 | + " jnz 1b ;\n" | |
11153 | + | |
11154 | + : | |
11155 | + : "r" (lines), | |
11156 | + "r" (bh_ptr[0]->b_data), | |
11157 | + "r" (bh_ptr[1]->b_data), | |
11158 | + "r" (bh_ptr[2]->b_data), | |
11159 | + "r" (bh_ptr[3]->b_data) | |
11160 | + : "memory" ); | |
11161 | + break; | |
11162 | + case 5: | |
11163 | + __asm__ __volatile__ ( | |
11164 | +#undef BLOCK | |
11165 | +#define BLOCK(i) \ | |
11166 | + PF1(i) \ | |
11167 | + PF1(i+2) \ | |
11168 | + LD(i,0) \ | |
11169 | + LD(i+1,1) \ | |
11170 | + LD(i+2,2) \ | |
11171 | + LD(i+3,3) \ | |
11172 | + PF2(i) \ | |
11173 | + PF2(i+2) \ | |
11174 | + XO1(i,0) \ | |
11175 | + XO1(i+1,1) \ | |
11176 | + XO1(i+2,2) \ | |
11177 | + XO1(i+3,3) \ | |
11178 | + PF3(i) \ | |
11179 | + PF3(i+2) \ | |
11180 | + XO2(i,0) \ | |
11181 | + XO2(i+1,1) \ | |
11182 | + XO2(i+2,2) \ | |
11183 | + XO2(i+3,3) \ | |
11184 | + PF4(i) \ | |
11185 | + PF4(i+2) \ | |
11186 | + PF0(i+4) \ | |
11187 | + PF0(i+6) \ | |
11188 | + XO3(i,0) \ | |
11189 | + XO3(i+1,1) \ | |
11190 | + XO3(i+2,2) \ | |
11191 | + XO3(i+3,3) \ | |
11192 | + XO4(i,0) \ | |
11193 | + XO4(i+1,1) \ | |
11194 | + XO4(i+2,2) \ | |
11195 | + XO4(i+3,3) \ | |
11196 | + ST(i,0) \ | |
11197 | + ST(i+1,1) \ | |
11198 | + ST(i+2,2) \ | |
11199 | + ST(i+3,3) \ | |
11200 | + | |
11201 | + | |
11202 | + PF0(0) | |
11203 | + PF0(2) | |
11204 | + | |
11205 | + " .align 32,0x90 ;\n" | |
11206 | + " 1: ;\n" | |
11207 | + | |
11208 | + BLOCK(0) | |
11209 | + BLOCK(4) | |
11210 | + BLOCK(8) | |
11211 | + BLOCK(12) | |
11212 | + | |
11213 | + " addl $256, %1 ;\n" | |
11214 | + " addl $256, %2 ;\n" | |
11215 | + " addl $256, %3 ;\n" | |
11216 | + " addl $256, %4 ;\n" | |
11217 | + " addl $256, %5 ;\n" | |
11218 | + " decl %0 ;\n" | |
11219 | + " jnz 1b ;\n" | |
11220 | + | |
11221 | + : | |
11222 | + : "r" (lines), | |
11223 | + "r" (bh_ptr[0]->b_data), | |
11224 | + "r" (bh_ptr[1]->b_data), | |
11225 | + "r" (bh_ptr[2]->b_data), | |
11226 | + "r" (bh_ptr[3]->b_data), | |
11227 | + "r" (bh_ptr[4]->b_data) | |
11228 | + : "memory"); | |
11229 | + break; | |
11230 | + } | |
11231 | + | |
11232 | + __asm__ __volatile__ ( | |
11233 | + "sfence ;\n\t" | |
11234 | + "movups (%1),%%xmm0 ;\n\t" | |
11235 | + "movups 0x10(%1),%%xmm1 ;\n\t" | |
11236 | + "movups 0x20(%1),%%xmm2 ;\n\t" | |
11237 | + "movups 0x30(%1),%%xmm3 ;\n\t" | |
11238 | + "movl %0,%%cr0 ;\n\t" | |
11239 | + : | |
11240 | + : "r" (cr0), "r" (xmm_save) | |
11241 | + : "memory" ); | |
11242 | +} | |
11243 | + | |
11244 | +#undef OFFS | |
11245 | +#undef LD | |
11246 | +#undef ST | |
11247 | +#undef PF0 | |
11248 | +#undef PF1 | |
11249 | +#undef PF2 | |
11250 | +#undef PF3 | |
11251 | +#undef PF4 | |
11252 | +#undef PF5 | |
11253 | +#undef XO1 | |
11254 | +#undef XO2 | |
11255 | +#undef XO3 | |
11256 | +#undef XO4 | |
11257 | +#undef XO5 | |
11258 | +#undef BLOCK | |
11259 | + | |
11260 | +#endif /* CONFIG_X86_XMM */ | |
11261 | + | |
11262 | +/* | |
11263 | + * high-speed RAID5 checksumming functions utilizing MMX instructions | |
11264 | + * Copyright (C) 1998 Ingo Molnar | |
11265 | + */ | |
11266 | +XORBLOCK_TEMPLATE(pII_mmx) | |
11267 | +{ | |
11268 | + char fpu_save[108]; | |
11269 | + int lines = (bh_ptr[0]->b_size>>7); | |
11270 | + | |
11271 | + if (!(current->flags & PF_USEDFPU)) | |
11272 | + __asm__ __volatile__ ( " clts;\n"); | |
11273 | + | |
11274 | + __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); | |
11275 | + | |
11276 | +#define LD(x,y) \ | |
11277 | + " movq 8*("#x")(%1), %%mm"#y" ;\n" | |
11278 | +#define ST(x,y) \ | |
11279 | + " movq %%mm"#y", 8*("#x")(%1) ;\n" | |
11280 | +#define XO1(x,y) \ | |
11281 | + " pxor 8*("#x")(%2), %%mm"#y" ;\n" | |
11282 | +#define XO2(x,y) \ | |
11283 | + " pxor 8*("#x")(%3), %%mm"#y" ;\n" | |
11284 | +#define XO3(x,y) \ | |
11285 | + " pxor 8*("#x")(%4), %%mm"#y" ;\n" | |
11286 | +#define XO4(x,y) \ | |
11287 | + " pxor 8*("#x")(%5), %%mm"#y" ;\n" | |
11288 | + | |
11289 | + switch(count) { | |
11290 | + case 2: | |
11291 | + __asm__ __volatile__ ( | |
11292 | +#undef BLOCK | |
11293 | +#define BLOCK(i) \ | |
11294 | + LD(i,0) \ | |
11295 | + LD(i+1,1) \ | |
11296 | + LD(i+2,2) \ | |
11297 | + LD(i+3,3) \ | |
11298 | + XO1(i,0) \ | |
11299 | + ST(i,0) \ | |
11300 | + XO1(i+1,1) \ | |
11301 | + ST(i+1,1) \ | |
11302 | + XO1(i+2,2) \ | |
11303 | + ST(i+2,2) \ | |
11304 | + XO1(i+3,3) \ | |
11305 | + ST(i+3,3) | |
11306 | + | |
11307 | + " .align 32,0x90 ;\n" | |
11308 | + " 1: ;\n" | |
11309 | + | |
11310 | + BLOCK(0) | |
11311 | + BLOCK(4) | |
11312 | + BLOCK(8) | |
11313 | + BLOCK(12) | |
11314 | + | |
11315 | + " addl $128, %1 ;\n" | |
11316 | + " addl $128, %2 ;\n" | |
11317 | + " decl %0 ;\n" | |
11318 | + " jnz 1b ;\n" | |
11319 | + : | |
11320 | + : "r" (lines), | |
11321 | + "r" (bh_ptr[0]->b_data), | |
11322 | + "r" (bh_ptr[1]->b_data) | |
11323 | + : "memory"); | |
11324 | + break; | |
11325 | + case 3: | |
11326 | + __asm__ __volatile__ ( | |
11327 | +#undef BLOCK | |
11328 | +#define BLOCK(i) \ | |
11329 | + LD(i,0) \ | |
11330 | + LD(i+1,1) \ | |
11331 | + LD(i+2,2) \ | |
11332 | + LD(i+3,3) \ | |
11333 | + XO1(i,0) \ | |
11334 | + XO1(i+1,1) \ | |
11335 | + XO1(i+2,2) \ | |
11336 | + XO1(i+3,3) \ | |
11337 | + XO2(i,0) \ | |
11338 | + ST(i,0) \ | |
11339 | + XO2(i+1,1) \ | |
11340 | + ST(i+1,1) \ | |
11341 | + XO2(i+2,2) \ | |
11342 | + ST(i+2,2) \ | |
11343 | + XO2(i+3,3) \ | |
11344 | + ST(i+3,3) | |
11345 | + | |
11346 | + " .align 32,0x90 ;\n" | |
11347 | + " 1: ;\n" | |
11348 | + | |
11349 | + BLOCK(0) | |
11350 | + BLOCK(4) | |
11351 | + BLOCK(8) | |
11352 | + BLOCK(12) | |
11353 | + | |
11354 | + " addl $128, %1 ;\n" | |
11355 | + " addl $128, %2 ;\n" | |
11356 | + " addl $128, %3 ;\n" | |
11357 | + " decl %0 ;\n" | |
11358 | + " jnz 1b ;\n" | |
11359 | + : | |
11360 | + : "r" (lines), | |
11361 | + "r" (bh_ptr[0]->b_data), | |
11362 | + "r" (bh_ptr[1]->b_data), | |
11363 | + "r" (bh_ptr[2]->b_data) | |
11364 | + : "memory"); | |
11365 | + break; | |
11366 | + case 4: | |
11367 | + __asm__ __volatile__ ( | |
11368 | +#undef BLOCK | |
11369 | +#define BLOCK(i) \ | |
11370 | + LD(i,0) \ | |
11371 | + LD(i+1,1) \ | |
11372 | + LD(i+2,2) \ | |
11373 | + LD(i+3,3) \ | |
11374 | + XO1(i,0) \ | |
11375 | + XO1(i+1,1) \ | |
11376 | + XO1(i+2,2) \ | |
11377 | + XO1(i+3,3) \ | |
11378 | + XO2(i,0) \ | |
11379 | + XO2(i+1,1) \ | |
11380 | + XO2(i+2,2) \ | |
11381 | + XO2(i+3,3) \ | |
11382 | + XO3(i,0) \ | |
11383 | + ST(i,0) \ | |
11384 | + XO3(i+1,1) \ | |
11385 | + ST(i+1,1) \ | |
11386 | + XO3(i+2,2) \ | |
11387 | + ST(i+2,2) \ | |
11388 | + XO3(i+3,3) \ | |
11389 | + ST(i+3,3) | |
11390 | + | |
11391 | + " .align 32,0x90 ;\n" | |
11392 | + " 1: ;\n" | |
11393 | + | |
11394 | + BLOCK(0) | |
11395 | + BLOCK(4) | |
11396 | + BLOCK(8) | |
11397 | + BLOCK(12) | |
11398 | + | |
11399 | + " addl $128, %1 ;\n" | |
11400 | + " addl $128, %2 ;\n" | |
11401 | + " addl $128, %3 ;\n" | |
11402 | + " addl $128, %4 ;\n" | |
11403 | + " decl %0 ;\n" | |
11404 | + " jnz 1b ;\n" | |
11405 | + : | |
11406 | + : "r" (lines), | |
11407 | + "r" (bh_ptr[0]->b_data), | |
11408 | + "r" (bh_ptr[1]->b_data), | |
11409 | + "r" (bh_ptr[2]->b_data), | |
11410 | + "r" (bh_ptr[3]->b_data) | |
11411 | + : "memory"); | |
11412 | + break; | |
11413 | + case 5: | |
11414 | + __asm__ __volatile__ ( | |
11415 | +#undef BLOCK | |
11416 | +#define BLOCK(i) \ | |
11417 | + LD(i,0) \ | |
11418 | + LD(i+1,1) \ | |
11419 | + LD(i+2,2) \ | |
11420 | + LD(i+3,3) \ | |
11421 | + XO1(i,0) \ | |
11422 | + XO1(i+1,1) \ | |
11423 | + XO1(i+2,2) \ | |
11424 | + XO1(i+3,3) \ | |
11425 | + XO2(i,0) \ | |
11426 | + XO2(i+1,1) \ | |
11427 | + XO2(i+2,2) \ | |
11428 | + XO2(i+3,3) \ | |
11429 | + XO3(i,0) \ | |
11430 | + XO3(i+1,1) \ | |
11431 | + XO3(i+2,2) \ | |
11432 | + XO3(i+3,3) \ | |
11433 | + XO4(i,0) \ | |
11434 | + ST(i,0) \ | |
11435 | + XO4(i+1,1) \ | |
11436 | + ST(i+1,1) \ | |
11437 | + XO4(i+2,2) \ | |
11438 | + ST(i+2,2) \ | |
11439 | + XO4(i+3,3) \ | |
11440 | + ST(i+3,3) | |
11441 | + | |
11442 | + " .align 32,0x90 ;\n" | |
11443 | + " 1: ;\n" | |
11444 | + | |
11445 | + BLOCK(0) | |
11446 | + BLOCK(4) | |
11447 | + BLOCK(8) | |
11448 | + BLOCK(12) | |
11449 | + | |
11450 | + " addl $128, %1 ;\n" | |
11451 | + " addl $128, %2 ;\n" | |
11452 | + " addl $128, %3 ;\n" | |
11453 | + " addl $128, %4 ;\n" | |
11454 | + " addl $128, %5 ;\n" | |
11455 | + " decl %0 ;\n" | |
11456 | + " jnz 1b ;\n" | |
11457 | + : | |
11458 | + : "r" (lines), | |
11459 | + "r" (bh_ptr[0]->b_data), | |
11460 | + "r" (bh_ptr[1]->b_data), | |
11461 | + "r" (bh_ptr[2]->b_data), | |
11462 | + "r" (bh_ptr[3]->b_data), | |
11463 | + "r" (bh_ptr[4]->b_data) | |
11464 | + : "memory"); | |
11465 | + break; | |
11466 | + } | |
11467 | + | |
11468 | + __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) ); | |
11469 | + | |
11470 | + if (!(current->flags & PF_USEDFPU)) | |
11471 | + stts(); | |
11472 | +} | |
11473 | + | |
11474 | +#undef LD | |
11475 | +#undef XO1 | |
11476 | +#undef XO2 | |
11477 | +#undef XO3 | |
11478 | +#undef XO4 | |
11479 | +#undef ST | |
11480 | +#undef BLOCK | |
11481 | + | |
11482 | +XORBLOCK_TEMPLATE(p5_mmx) | |
11483 | +{ | |
11484 | + char fpu_save[108]; | |
11485 | + int lines = (bh_ptr[0]->b_size>>6); | |
11486 | + | |
11487 | + if (!(current->flags & PF_USEDFPU)) | |
11488 | + __asm__ __volatile__ ( " clts;\n"); | |
11489 | + | |
11490 | + __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); | |
11491 | + | |
11492 | + switch(count) { | |
11493 | + case 2: | |
11494 | + __asm__ __volatile__ ( | |
11495 | + | |
11496 | + " .align 32,0x90 ;\n" | |
11497 | + " 1: ;\n" | |
11498 | + " movq (%1), %%mm0 ;\n" | |
11499 | + " movq 8(%1), %%mm1 ;\n" | |
11500 | + " pxor (%2), %%mm0 ;\n" | |
11501 | + " movq 16(%1), %%mm2 ;\n" | |
11502 | + " movq %%mm0, (%1) ;\n" | |
11503 | + " pxor 8(%2), %%mm1 ;\n" | |
11504 | + " movq 24(%1), %%mm3 ;\n" | |
11505 | + " movq %%mm1, 8(%1) ;\n" | |
11506 | + " pxor 16(%2), %%mm2 ;\n" | |
11507 | + " movq 32(%1), %%mm4 ;\n" | |
11508 | + " movq %%mm2, 16(%1) ;\n" | |
11509 | + " pxor 24(%2), %%mm3 ;\n" | |
11510 | + " movq 40(%1), %%mm5 ;\n" | |
11511 | + " movq %%mm3, 24(%1) ;\n" | |
11512 | + " pxor 32(%2), %%mm4 ;\n" | |
11513 | + " movq 48(%1), %%mm6 ;\n" | |
11514 | + " movq %%mm4, 32(%1) ;\n" | |
11515 | + " pxor 40(%2), %%mm5 ;\n" | |
11516 | + " movq 56(%1), %%mm7 ;\n" | |
11517 | + " movq %%mm5, 40(%1) ;\n" | |
11518 | + " pxor 48(%2), %%mm6 ;\n" | |
11519 | + " pxor 56(%2), %%mm7 ;\n" | |
11520 | + " movq %%mm6, 48(%1) ;\n" | |
11521 | + " movq %%mm7, 56(%1) ;\n" | |
11522 | + | |
11523 | + " addl $64, %1 ;\n" | |
11524 | + " addl $64, %2 ;\n" | |
11525 | + " decl %0 ;\n" | |
11526 | + " jnz 1b ;\n" | |
11527 | + | |
11528 | + : | |
11529 | + : "r" (lines), | |
11530 | + "r" (bh_ptr[0]->b_data), | |
11531 | + "r" (bh_ptr[1]->b_data) | |
11532 | + : "memory" ); | |
11533 | + break; | |
11534 | + case 3: | |
11535 | + __asm__ __volatile__ ( | |
11536 | + | |
11537 | + " .align 32,0x90 ;\n" | |
11538 | + " 1: ;\n" | |
11539 | + " movq (%1), %%mm0 ;\n" | |
11540 | + " movq 8(%1), %%mm1 ;\n" | |
11541 | + " pxor (%2), %%mm0 ;\n" | |
11542 | + " movq 16(%1), %%mm2 ;\n" | |
11543 | + " pxor 8(%2), %%mm1 ;\n" | |
11544 | + " pxor (%3), %%mm0 ;\n" | |
11545 | + " pxor 16(%2), %%mm2 ;\n" | |
11546 | + " movq %%mm0, (%1) ;\n" | |
11547 | + " pxor 8(%3), %%mm1 ;\n" | |
11548 | + " pxor 16(%3), %%mm2 ;\n" | |
11549 | + " movq 24(%1), %%mm3 ;\n" | |
11550 | + " movq %%mm1, 8(%1) ;\n" | |
11551 | + " movq 32(%1), %%mm4 ;\n" | |
11552 | + " movq 40(%1), %%mm5 ;\n" | |
11553 | + " pxor 24(%2), %%mm3 ;\n" | |
11554 | + " movq %%mm2, 16(%1) ;\n" | |
11555 | + " pxor 32(%2), %%mm4 ;\n" | |
11556 | + " pxor 24(%3), %%mm3 ;\n" | |
11557 | + " pxor 40(%2), %%mm5 ;\n" | |
11558 | + " movq %%mm3, 24(%1) ;\n" | |
11559 | + " pxor 32(%3), %%mm4 ;\n" | |
11560 | + " pxor 40(%3), %%mm5 ;\n" | |
11561 | + " movq 48(%1), %%mm6 ;\n" | |
11562 | + " movq %%mm4, 32(%1) ;\n" | |
11563 | + " movq 56(%1), %%mm7 ;\n" | |
11564 | + " pxor 48(%2), %%mm6 ;\n" | |
11565 | + " movq %%mm5, 40(%1) ;\n" | |
11566 | + " pxor 56(%2), %%mm7 ;\n" | |
11567 | + " pxor 48(%3), %%mm6 ;\n" | |
11568 | + " pxor 56(%3), %%mm7 ;\n" | |
11569 | + " movq %%mm6, 48(%1) ;\n" | |
11570 | + " movq %%mm7, 56(%1) ;\n" | |
11571 | + | |
11572 | + " addl $64, %1 ;\n" | |
11573 | + " addl $64, %2 ;\n" | |
11574 | + " addl $64, %3 ;\n" | |
11575 | + " decl %0 ;\n" | |
11576 | + " jnz 1b ;\n" | |
11577 | + | |
11578 | + : | |
11579 | + : "r" (lines), | |
11580 | + "r" (bh_ptr[0]->b_data), | |
11581 | + "r" (bh_ptr[1]->b_data), | |
11582 | + "r" (bh_ptr[2]->b_data) | |
11583 | + : "memory" ); | |
11584 | + break; | |
11585 | + case 4: | |
11586 | + __asm__ __volatile__ ( | |
11587 | + | |
11588 | + " .align 32,0x90 ;\n" | |
11589 | + " 1: ;\n" | |
11590 | + " movq (%1), %%mm0 ;\n" | |
11591 | + " movq 8(%1), %%mm1 ;\n" | |
11592 | + " pxor (%2), %%mm0 ;\n" | |
11593 | + " movq 16(%1), %%mm2 ;\n" | |
11594 | + " pxor 8(%2), %%mm1 ;\n" | |
11595 | + " pxor (%3), %%mm0 ;\n" | |
11596 | + " pxor 16(%2), %%mm2 ;\n" | |
11597 | + " pxor 8(%3), %%mm1 ;\n" | |
11598 | + " pxor (%4), %%mm0 ;\n" | |
11599 | + " movq 24(%1), %%mm3 ;\n" | |
11600 | + " pxor 16(%3), %%mm2 ;\n" | |
11601 | + " pxor 8(%4), %%mm1 ;\n" | |
11602 | + " movq %%mm0, (%1) ;\n" | |
11603 | + " movq 32(%1), %%mm4 ;\n" | |
11604 | + " pxor 24(%2), %%mm3 ;\n" | |
11605 | + " pxor 16(%4), %%mm2 ;\n" | |
11606 | + " movq %%mm1, 8(%1) ;\n" | |
11607 | + " movq 40(%1), %%mm5 ;\n" | |
11608 | + " pxor 32(%2), %%mm4 ;\n" | |
11609 | + " pxor 24(%3), %%mm3 ;\n" | |
11610 | + " movq %%mm2, 16(%1) ;\n" | |
11611 | + " pxor 40(%2), %%mm5 ;\n" | |
11612 | + " pxor 32(%3), %%mm4 ;\n" | |
11613 | + " pxor 24(%4), %%mm3 ;\n" | |
11614 | + " movq %%mm3, 24(%1) ;\n" | |
11615 | + " movq 56(%1), %%mm7 ;\n" | |
11616 | + " movq 48(%1), %%mm6 ;\n" | |
11617 | + " pxor 40(%3), %%mm5 ;\n" | |
11618 | + " pxor 32(%4), %%mm4 ;\n" | |
11619 | + " pxor 48(%2), %%mm6 ;\n" | |
11620 | + " movq %%mm4, 32(%1) ;\n" | |
11621 | + " pxor 56(%2), %%mm7 ;\n" | |
11622 | + " pxor 40(%4), %%mm5 ;\n" | |
11623 | + " pxor 48(%3), %%mm6 ;\n" | |
11624 | + " pxor 56(%3), %%mm7 ;\n" | |
11625 | + " movq %%mm5, 40(%1) ;\n" | |
11626 | + " pxor 48(%4), %%mm6 ;\n" | |
11627 | + " pxor 56(%4), %%mm7 ;\n" | |
11628 | + " movq %%mm6, 48(%1) ;\n" | |
11629 | + " movq %%mm7, 56(%1) ;\n" | |
11630 | + | |
11631 | + " addl $64, %1 ;\n" | |
11632 | + " addl $64, %2 ;\n" | |
11633 | + " addl $64, %3 ;\n" | |
11634 | + " addl $64, %4 ;\n" | |
11635 | + " decl %0 ;\n" | |
11636 | + " jnz 1b ;\n" | |
11637 | + | |
11638 | + : | |
11639 | + : "r" (lines), | |
11640 | + "r" (bh_ptr[0]->b_data), | |
11641 | + "r" (bh_ptr[1]->b_data), | |
11642 | + "r" (bh_ptr[2]->b_data), | |
11643 | + "r" (bh_ptr[3]->b_data) | |
11644 | + : "memory" ); | |
11645 | + break; | |
11646 | + case 5: | |
11647 | + __asm__ __volatile__ ( | |
11648 | + | |
11649 | + " .align 32,0x90 ;\n" | |
11650 | + " 1: ;\n" | |
11651 | + " movq (%1), %%mm0 ;\n" | |
11652 | + " movq 8(%1), %%mm1 ;\n" | |
11653 | + " pxor (%2), %%mm0 ;\n" | |
11654 | + " pxor 8(%2), %%mm1 ;\n" | |
11655 | + " movq 16(%1), %%mm2 ;\n" | |
11656 | + " pxor (%3), %%mm0 ;\n" | |
11657 | + " pxor 8(%3), %%mm1 ;\n" | |
11658 | + " pxor 16(%2), %%mm2 ;\n" | |
11659 | + " pxor (%4), %%mm0 ;\n" | |
11660 | + " pxor 8(%4), %%mm1 ;\n" | |
11661 | + " pxor 16(%3), %%mm2 ;\n" | |
11662 | + " movq 24(%1), %%mm3 ;\n" | |
11663 | + " pxor (%5), %%mm0 ;\n" | |
11664 | + " pxor 8(%5), %%mm1 ;\n" | |
11665 | + " movq %%mm0, (%1) ;\n" | |
11666 | + " pxor 16(%4), %%mm2 ;\n" | |
11667 | + " pxor 24(%2), %%mm3 ;\n" | |
11668 | + " movq %%mm1, 8(%1) ;\n" | |
11669 | + " pxor 16(%5), %%mm2 ;\n" | |
11670 | + " pxor 24(%3), %%mm3 ;\n" | |
11671 | + " movq 32(%1), %%mm4 ;\n" | |
11672 | + " movq %%mm2, 16(%1) ;\n" | |
11673 | + " pxor 24(%4), %%mm3 ;\n" | |
11674 | + " pxor 32(%2), %%mm4 ;\n" | |
11675 | + " movq 40(%1), %%mm5 ;\n" | |
11676 | + " pxor 24(%5), %%mm3 ;\n" | |
11677 | + " pxor 32(%3), %%mm4 ;\n" | |
11678 | + " pxor 40(%2), %%mm5 ;\n" | |
11679 | + " movq %%mm3, 24(%1) ;\n" | |
11680 | + " pxor 32(%4), %%mm4 ;\n" | |
11681 | + " pxor 40(%3), %%mm5 ;\n" | |
11682 | + " movq 48(%1), %%mm6 ;\n" | |
11683 | + " movq 56(%1), %%mm7 ;\n" | |
11684 | + " pxor 32(%5), %%mm4 ;\n" | |
11685 | + " pxor 40(%4), %%mm5 ;\n" | |
11686 | + " pxor 48(%2), %%mm6 ;\n" | |
11687 | + " pxor 56(%2), %%mm7 ;\n" | |
11688 | + " movq %%mm4, 32(%1) ;\n" | |
11689 | + " pxor 48(%3), %%mm6 ;\n" | |
11690 | + " pxor 56(%3), %%mm7 ;\n" | |
11691 | + " pxor 40(%5), %%mm5 ;\n" | |
11692 | + " pxor 48(%4), %%mm6 ;\n" | |
11693 | + " pxor 56(%4), %%mm7 ;\n" | |
11694 | + " movq %%mm5, 40(%1) ;\n" | |
11695 | + " pxor 48(%5), %%mm6 ;\n" | |
11696 | + " pxor 56(%5), %%mm7 ;\n" | |
11697 | + " movq %%mm6, 48(%1) ;\n" | |
11698 | + " movq %%mm7, 56(%1) ;\n" | |
11699 | + | |
11700 | + " addl $64, %1 ;\n" | |
11701 | + " addl $64, %2 ;\n" | |
11702 | + " addl $64, %3 ;\n" | |
11703 | + " addl $64, %4 ;\n" | |
11704 | + " addl $64, %5 ;\n" | |
11705 | + " decl %0 ;\n" | |
11706 | + " jnz 1b ;\n" | |
11707 | + | |
11708 | + : | |
11709 | + : "r" (lines), | |
11710 | + "r" (bh_ptr[0]->b_data), | |
11711 | + "r" (bh_ptr[1]->b_data), | |
11712 | + "r" (bh_ptr[2]->b_data), | |
11713 | + "r" (bh_ptr[3]->b_data), | |
11714 | + "r" (bh_ptr[4]->b_data) | |
11715 | + : "memory" ); | |
11716 | + break; | |
11717 | + } | |
11718 | + | |
11719 | + __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) ); | |
11720 | + | |
11721 | + if (!(current->flags & PF_USEDFPU)) | |
11722 | + stts(); | |
11723 | +} | |
11724 | +#endif /* __i386__ */ | |
11725 | +#endif /* !__sparc_v9__ */ | |
11726 | + | |
11727 | +#ifdef __sparc_v9__ | |
11728 | +/* | |
11729 | + * High speed xor_block operation for RAID4/5 utilizing the | |
11730 | + * UltraSparc Visual Instruction Set. | |
11731 | + * | |
11732 | + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) | |
11733 | + * | |
11734 | + * Requirements: | |
11735 | + * !(((long)dest | (long)sourceN) & (64 - 1)) && | |
11736 | + * !(len & 127) && len >= 256 | |
11737 | + * | |
11738 | + * It is done in pure assembly, as otherwise gcc makes it | |
11739 | + * a non-leaf function, which is not what we want. | |
11740 | + * Also, we don't measure the speeds as on other architectures, | |
11741 | + * as the measuring routine does not take into account cold caches | |
11742 | + * and the fact that xor_block_VIS bypasses the caches. | |
11743 | + * xor_block_32regs might be 5% faster for count 2 if caches are hot | |
11744 | + * and things just right (for count 3 VIS is about as fast as 32regs for | |
11745 | + * hot caches and for count 4 and 5 VIS is faster by good margin always), | |
11746 | + * but I think it is better not to pollute the caches. | |
11747 | + * Actually, if I'd just fight for speed for hot caches, I could | |
11748 | + * write a hybrid VIS/integer routine, which would do always two | |
11749 | + * 64B blocks in VIS and two in IEUs, but I really care more about | |
11750 | + * caches. | |
11751 | + */ | |
11752 | +extern void *VISenter(void); | |
11753 | +extern void xor_block_VIS XOR_ARGS; | |
11754 | + | |
11755 | +void __xor_block_VIS(void) | |
11756 | +{ | |
11757 | +__asm__ (" | |
11758 | + .globl xor_block_VIS | |
11759 | +xor_block_VIS: | |
11760 | + ldx [%%o1 + 0], %%o4 | |
11761 | + ldx [%%o1 + 8], %%o3 | |
11762 | + ldx [%%o4 + %1], %%g5 | |
11763 | + ldx [%%o4 + %0], %%o4 | |
11764 | + ldx [%%o3 + %0], %%o3 | |
11765 | + rd %%fprs, %%o5 | |
11766 | + andcc %%o5, %2, %%g0 | |
11767 | + be,pt %%icc, 297f | |
11768 | + sethi %%hi(%5), %%g1 | |
11769 | + jmpl %%g1 + %%lo(%5), %%g7 | |
11770 | + add %%g7, 8, %%g7 | |
11771 | +297: wr %%g0, %4, %%fprs | |
11772 | + membar #LoadStore|#StoreLoad|#StoreStore | |
11773 | + sub %%g5, 64, %%g5 | |
11774 | + ldda [%%o4] %3, %%f0 | |
11775 | + ldda [%%o3] %3, %%f16 | |
11776 | + cmp %%o0, 4 | |
11777 | + bgeu,pt %%xcc, 10f | |
11778 | + cmp %%o0, 3 | |
11779 | + be,pn %%xcc, 13f | |
11780 | + mov -64, %%g1 | |
11781 | + sub %%g5, 64, %%g5 | |
11782 | + rd %%asi, %%g1 | |
11783 | + wr %%g0, %3, %%asi | |
11784 | + | |
11785 | +2: ldda [%%o4 + 64] %%asi, %%f32 | |
11786 | + fxor %%f0, %%f16, %%f16 | |
11787 | + fxor %%f2, %%f18, %%f18 | |
11788 | + fxor %%f4, %%f20, %%f20 | |
11789 | + fxor %%f6, %%f22, %%f22 | |
11790 | + fxor %%f8, %%f24, %%f24 | |
11791 | + fxor %%f10, %%f26, %%f26 | |
11792 | + fxor %%f12, %%f28, %%f28 | |
11793 | + fxor %%f14, %%f30, %%f30 | |
11794 | + stda %%f16, [%%o4] %3 | |
11795 | + ldda [%%o3 + 64] %%asi, %%f48 | |
11796 | + ldda [%%o4 + 128] %%asi, %%f0 | |
11797 | + fxor %%f32, %%f48, %%f48 | |
11798 | + fxor %%f34, %%f50, %%f50 | |
11799 | + add %%o4, 128, %%o4 | |
11800 | + fxor %%f36, %%f52, %%f52 | |
11801 | + add %%o3, 128, %%o3 | |
11802 | + fxor %%f38, %%f54, %%f54 | |
11803 | + subcc %%g5, 128, %%g5 | |
11804 | + fxor %%f40, %%f56, %%f56 | |
11805 | + fxor %%f42, %%f58, %%f58 | |
11806 | + fxor %%f44, %%f60, %%f60 | |
11807 | + fxor %%f46, %%f62, %%f62 | |
11808 | + stda %%f48, [%%o4 - 64] %%asi | |
11809 | + bne,pt %%xcc, 2b | |
11810 | + ldda [%%o3] %3, %%f16 | |
11811 | + | |
11812 | + ldda [%%o4 + 64] %%asi, %%f32 | |
11813 | + fxor %%f0, %%f16, %%f16 | |
11814 | + fxor %%f2, %%f18, %%f18 | |
11815 | + fxor %%f4, %%f20, %%f20 | |
11816 | + fxor %%f6, %%f22, %%f22 | |
11817 | + fxor %%f8, %%f24, %%f24 | |
11818 | + fxor %%f10, %%f26, %%f26 | |
11819 | + fxor %%f12, %%f28, %%f28 | |
11820 | + fxor %%f14, %%f30, %%f30 | |
11821 | + stda %%f16, [%%o4] %3 | |
11822 | + ldda [%%o3 + 64] %%asi, %%f48 | |
11823 | + membar #Sync | |
11824 | + fxor %%f32, %%f48, %%f48 | |
11825 | + fxor %%f34, %%f50, %%f50 | |
11826 | + fxor %%f36, %%f52, %%f52 | |
11827 | + fxor %%f38, %%f54, %%f54 | |
11828 | + fxor %%f40, %%f56, %%f56 | |
11829 | + fxor %%f42, %%f58, %%f58 | |
11830 | + fxor %%f44, %%f60, %%f60 | |
11831 | + fxor %%f46, %%f62, %%f62 | |
11832 | + stda %%f48, [%%o4 + 64] %%asi | |
11833 | + membar #Sync|#StoreStore|#StoreLoad | |
11834 | + wr %%g0, 0, %%fprs | |
11835 | + retl | |
11836 | + wr %%g1, %%g0, %%asi | |
11837 | + | |
11838 | +13: ldx [%%o1 + 16], %%o2 | |
11839 | + ldx [%%o2 + %0], %%o2 | |
11840 | + | |
11841 | +3: ldda [%%o2] %3, %%f32 | |
11842 | + fxor %%f0, %%f16, %%f48 | |
11843 | + fxor %%f2, %%f18, %%f50 | |
11844 | + add %%o4, 64, %%o4 | |
11845 | + fxor %%f4, %%f20, %%f52 | |
11846 | + fxor %%f6, %%f22, %%f54 | |
11847 | + add %%o3, 64, %%o3 | |
11848 | + fxor %%f8, %%f24, %%f56 | |
11849 | + fxor %%f10, %%f26, %%f58 | |
11850 | + fxor %%f12, %%f28, %%f60 | |
11851 | + fxor %%f14, %%f30, %%f62 | |
11852 | + ldda [%%o4] %3, %%f0 | |
11853 | + fxor %%f48, %%f32, %%f48 | |
11854 | + fxor %%f50, %%f34, %%f50 | |
11855 | + fxor %%f52, %%f36, %%f52 | |
11856 | + fxor %%f54, %%f38, %%f54 | |
11857 | + add %%o2, 64, %%o2 | |
11858 | + fxor %%f56, %%f40, %%f56 | |
11859 | + fxor %%f58, %%f42, %%f58 | |
11860 | + subcc %%g5, 64, %%g5 | |
11861 | + fxor %%f60, %%f44, %%f60 | |
11862 | + fxor %%f62, %%f46, %%f62 | |
11863 | + stda %%f48, [%%o4 + %%g1] %3 | |
11864 | + bne,pt %%xcc, 3b | |
11865 | + ldda [%%o3] %3, %%f16 | |
11866 | + | |
11867 | + ldda [%%o2] %3, %%f32 | |
11868 | + fxor %%f0, %%f16, %%f48 | |
11869 | + fxor %%f2, %%f18, %%f50 | |
11870 | + fxor %%f4, %%f20, %%f52 | |
11871 | + fxor %%f6, %%f22, %%f54 | |
11872 | + fxor %%f8, %%f24, %%f56 | |
11873 | + fxor %%f10, %%f26, %%f58 | |
11874 | + fxor %%f12, %%f28, %%f60 | |
11875 | + fxor %%f14, %%f30, %%f62 | |
11876 | + membar #Sync | |
11877 | + fxor %%f48, %%f32, %%f48 | |
11878 | + fxor %%f50, %%f34, %%f50 | |
11879 | + fxor %%f52, %%f36, %%f52 | |
11880 | + fxor %%f54, %%f38, %%f54 | |
11881 | + fxor %%f56, %%f40, %%f56 | |
11882 | + fxor %%f58, %%f42, %%f58 | |
11883 | + fxor %%f60, %%f44, %%f60 | |
11884 | + fxor %%f62, %%f46, %%f62 | |
11885 | + stda %%f48, [%%o4] %3 | |
11886 | + membar #Sync|#StoreStore|#StoreLoad | |
11887 | + retl | |
11888 | + wr %%g0, 0, %%fprs | |
11889 | + | |
11890 | +10: cmp %%o0, 5 | |
11891 | + be,pt %%xcc, 15f | |
11892 | + mov -64, %%g1 | |
11893 | + | |
11894 | +14: ldx [%%o1 + 16], %%o2 | |
11895 | + ldx [%%o1 + 24], %%o0 | |
11896 | + ldx [%%o2 + %0], %%o2 | |
11897 | + ldx [%%o0 + %0], %%o0 | |
11898 | + | |
11899 | +4: ldda [%%o2] %3, %%f32 | |
11900 | + fxor %%f0, %%f16, %%f16 | |
11901 | + fxor %%f2, %%f18, %%f18 | |
11902 | + add %%o4, 64, %%o4 | |
11903 | + fxor %%f4, %%f20, %%f20 | |
11904 | + fxor %%f6, %%f22, %%f22 | |
11905 | + add %%o3, 64, %%o3 | |
11906 | + fxor %%f8, %%f24, %%f24 | |
11907 | + fxor %%f10, %%f26, %%f26 | |
11908 | + fxor %%f12, %%f28, %%f28 | |
11909 | + fxor %%f14, %%f30, %%f30 | |
11910 | + ldda [%%o0] %3, %%f48 | |
11911 | + fxor %%f16, %%f32, %%f32 | |
11912 | + fxor %%f18, %%f34, %%f34 | |
11913 | + fxor %%f20, %%f36, %%f36 | |
11914 | + fxor %%f22, %%f38, %%f38 | |
11915 | + add %%o2, 64, %%o2 | |
11916 | + fxor %%f24, %%f40, %%f40 | |
11917 | + fxor %%f26, %%f42, %%f42 | |
11918 | + fxor %%f28, %%f44, %%f44 | |
11919 | + fxor %%f30, %%f46, %%f46 | |
11920 | + ldda [%%o4] %3, %%f0 | |
11921 | + fxor %%f32, %%f48, %%f48 | |
11922 | + fxor %%f34, %%f50, %%f50 | |
11923 | + fxor %%f36, %%f52, %%f52 | |
11924 | + add %%o0, 64, %%o0 | |
11925 | + fxor %%f38, %%f54, %%f54 | |
11926 | + fxor %%f40, %%f56, %%f56 | |
11927 | + fxor %%f42, %%f58, %%f58 | |
11928 | + subcc %%g5, 64, %%g5 | |
11929 | + fxor %%f44, %%f60, %%f60 | |
11930 | + fxor %%f46, %%f62, %%f62 | |
11931 | + stda %%f48, [%%o4 + %%g1] %3 | |
11932 | + bne,pt %%xcc, 4b | |
11933 | + ldda [%%o3] %3, %%f16 | |
11934 | + | |
11935 | + ldda [%%o2] %3, %%f32 | |
11936 | + fxor %%f0, %%f16, %%f16 | |
11937 | + fxor %%f2, %%f18, %%f18 | |
11938 | + fxor %%f4, %%f20, %%f20 | |
11939 | + fxor %%f6, %%f22, %%f22 | |
11940 | + fxor %%f8, %%f24, %%f24 | |
11941 | + fxor %%f10, %%f26, %%f26 | |
11942 | + fxor %%f12, %%f28, %%f28 | |
11943 | + fxor %%f14, %%f30, %%f30 | |
11944 | + ldda [%%o0] %3, %%f48 | |
11945 | + fxor %%f16, %%f32, %%f32 | |
11946 | + fxor %%f18, %%f34, %%f34 | |
11947 | + fxor %%f20, %%f36, %%f36 | |
11948 | + fxor %%f22, %%f38, %%f38 | |
11949 | + fxor %%f24, %%f40, %%f40 | |
11950 | + fxor %%f26, %%f42, %%f42 | |
11951 | + fxor %%f28, %%f44, %%f44 | |
11952 | + fxor %%f30, %%f46, %%f46 | |
11953 | + membar #Sync | |
11954 | + fxor %%f32, %%f48, %%f48 | |
11955 | + fxor %%f34, %%f50, %%f50 | |
11956 | + fxor %%f36, %%f52, %%f52 | |
11957 | + fxor %%f38, %%f54, %%f54 | |
11958 | + fxor %%f40, %%f56, %%f56 | |
11959 | + fxor %%f42, %%f58, %%f58 | |
11960 | + fxor %%f44, %%f60, %%f60 | |
11961 | + fxor %%f46, %%f62, %%f62 | |
11962 | + stda %%f48, [%%o4] %3 | |
11963 | + membar #Sync|#StoreStore|#StoreLoad | |
11964 | + retl | |
11965 | + wr %%g0, 0, %%fprs | |
11966 | + | |
11967 | +15: ldx [%%o1 + 16], %%o2 | |
11968 | + ldx [%%o1 + 24], %%o0 | |
11969 | + ldx [%%o1 + 32], %%o1 | |
11970 | + ldx [%%o2 + %0], %%o2 | |
11971 | + ldx [%%o0 + %0], %%o0 | |
11972 | + ldx [%%o1 + %0], %%o1 | |
11973 | + | |
11974 | +5: ldda [%%o2] %3, %%f32 | |
11975 | + fxor %%f0, %%f16, %%f48 | |
11976 | + fxor %%f2, %%f18, %%f50 | |
11977 | + add %%o4, 64, %%o4 | |
11978 | + fxor %%f4, %%f20, %%f52 | |
11979 | + fxor %%f6, %%f22, %%f54 | |
11980 | + add %%o3, 64, %%o3 | |
11981 | + fxor %%f8, %%f24, %%f56 | |
11982 | + fxor %%f10, %%f26, %%f58 | |
11983 | + fxor %%f12, %%f28, %%f60 | |
11984 | + fxor %%f14, %%f30, %%f62 | |
11985 | + ldda [%%o0] %3, %%f16 | |
11986 | + fxor %%f48, %%f32, %%f48 | |
11987 | + fxor %%f50, %%f34, %%f50 | |
11988 | + fxor %%f52, %%f36, %%f52 | |
11989 | + fxor %%f54, %%f38, %%f54 | |
11990 | + add %%o2, 64, %%o2 | |
11991 | + fxor %%f56, %%f40, %%f56 | |
11992 | + fxor %%f58, %%f42, %%f58 | |
11993 | + fxor %%f60, %%f44, %%f60 | |
11994 | + fxor %%f62, %%f46, %%f62 | |
11995 | + ldda [%%o1] %3, %%f32 | |
11996 | + fxor %%f48, %%f16, %%f48 | |
11997 | + fxor %%f50, %%f18, %%f50 | |
11998 | + add %%o0, 64, %%o0 | |
11999 | + fxor %%f52, %%f20, %%f52 | |
12000 | + fxor %%f54, %%f22, %%f54 | |
12001 | + add %%o1, 64, %%o1 | |
12002 | + fxor %%f56, %%f24, %%f56 | |
12003 | + fxor %%f58, %%f26, %%f58 | |
12004 | + fxor %%f60, %%f28, %%f60 | |
12005 | + fxor %%f62, %%f30, %%f62 | |
12006 | + ldda [%%o4] %3, %%f0 | |
12007 | + fxor %%f48, %%f32, %%f48 | |
12008 | + fxor %%f50, %%f34, %%f50 | |
12009 | + fxor %%f52, %%f36, %%f52 | |
12010 | + fxor %%f54, %%f38, %%f54 | |
12011 | + fxor %%f56, %%f40, %%f56 | |
12012 | + fxor %%f58, %%f42, %%f58 | |
12013 | + subcc %%g5, 64, %%g5 | |
12014 | + fxor %%f60, %%f44, %%f60 | |
12015 | + fxor %%f62, %%f46, %%f62 | |
12016 | + stda %%f48, [%%o4 + %%g1] %3 | |
12017 | + bne,pt %%xcc, 5b | |
12018 | + ldda [%%o3] %3, %%f16 | |
12019 | + | |
12020 | + ldda [%%o2] %3, %%f32 | |
12021 | + fxor %%f0, %%f16, %%f48 | |
12022 | + fxor %%f2, %%f18, %%f50 | |
12023 | + fxor %%f4, %%f20, %%f52 | |
12024 | + fxor %%f6, %%f22, %%f54 | |
12025 | + fxor %%f8, %%f24, %%f56 | |
12026 | + fxor %%f10, %%f26, %%f58 | |
12027 | + fxor %%f12, %%f28, %%f60 | |
12028 | + fxor %%f14, %%f30, %%f62 | |
12029 | + ldda [%%o0] %3, %%f16 | |
12030 | + fxor %%f48, %%f32, %%f48 | |
12031 | + fxor %%f50, %%f34, %%f50 | |
12032 | + fxor %%f52, %%f36, %%f52 | |
12033 | + fxor %%f54, %%f38, %%f54 | |
12034 | + fxor %%f56, %%f40, %%f56 | |
12035 | + fxor %%f58, %%f42, %%f58 | |
12036 | + fxor %%f60, %%f44, %%f60 | |
12037 | + fxor %%f62, %%f46, %%f62 | |
12038 | + ldda [%%o1] %3, %%f32 | |
12039 | + fxor %%f48, %%f16, %%f48 | |
12040 | + fxor %%f50, %%f18, %%f50 | |
12041 | + fxor %%f52, %%f20, %%f52 | |
12042 | + fxor %%f54, %%f22, %%f54 | |
12043 | + fxor %%f56, %%f24, %%f56 | |
12044 | + fxor %%f58, %%f26, %%f58 | |
12045 | + fxor %%f60, %%f28, %%f60 | |
12046 | + fxor %%f62, %%f30, %%f62 | |
12047 | + membar #Sync | |
12048 | + fxor %%f48, %%f32, %%f48 | |
12049 | + fxor %%f50, %%f34, %%f50 | |
12050 | + fxor %%f52, %%f36, %%f52 | |
12051 | + fxor %%f54, %%f38, %%f54 | |
12052 | + fxor %%f56, %%f40, %%f56 | |
12053 | + fxor %%f58, %%f42, %%f58 | |
12054 | + fxor %%f60, %%f44, %%f60 | |
12055 | + fxor %%f62, %%f46, %%f62 | |
12056 | + stda %%f48, [%%o4] %3 | |
12057 | + membar #Sync|#StoreStore|#StoreLoad | |
12058 | + retl | |
12059 | + wr %%g0, 0, %%fprs | |
12060 | + " : : | |
12061 | + "i" (&((struct buffer_head *)0)->b_data), | |
12062 | + "i" (&((struct buffer_head *)0)->b_size), | |
12063 | + "i" (FPRS_FEF|FPRS_DU), "i" (ASI_BLK_P), | |
12064 | + "i" (FPRS_FEF), "i" (VISenter)); | |
12065 | +} | |
12066 | +#endif /* __sparc_v9__ */ | |
12067 | + | |
12068 | +#if defined(__sparc__) && !defined(__sparc_v9__) | |
12069 | +/* | |
12070 | + * High speed xor_block operation for RAID4/5 utilizing the | |
12071 | + * ldd/std SPARC instructions. | |
12072 | + * | |
12073 | + * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) | |
12074 | + * | |
12075 | + */ | |
12076 | + | |
12077 | +XORBLOCK_TEMPLATE(SPARC) | |
12078 | +{ | |
12079 | + int size = bh_ptr[0]->b_size; | |
12080 | + int lines = size / (sizeof (long)) / 8, i; | |
12081 | + long *destp = (long *) bh_ptr[0]->b_data; | |
12082 | + long *source1 = (long *) bh_ptr[1]->b_data; | |
12083 | + long *source2, *source3, *source4; | |
12084 | + | |
12085 | + switch (count) { | |
12086 | + case 2: | |
12087 | + for (i = lines; i > 0; i--) { | |
12088 | + __asm__ __volatile__(" | |
12089 | + ldd [%0 + 0x00], %%g2 | |
12090 | + ldd [%0 + 0x08], %%g4 | |
12091 | + ldd [%0 + 0x10], %%o0 | |
12092 | + ldd [%0 + 0x18], %%o2 | |
12093 | + ldd [%1 + 0x00], %%o4 | |
12094 | + ldd [%1 + 0x08], %%l0 | |
12095 | + ldd [%1 + 0x10], %%l2 | |
12096 | + ldd [%1 + 0x18], %%l4 | |
12097 | + xor %%g2, %%o4, %%g2 | |
12098 | + xor %%g3, %%o5, %%g3 | |
12099 | + xor %%g4, %%l0, %%g4 | |
12100 | + xor %%g5, %%l1, %%g5 | |
12101 | + xor %%o0, %%l2, %%o0 | |
12102 | + xor %%o1, %%l3, %%o1 | |
12103 | + xor %%o2, %%l4, %%o2 | |
12104 | + xor %%o3, %%l5, %%o3 | |
12105 | + std %%g2, [%0 + 0x00] | |
12106 | + std %%g4, [%0 + 0x08] | |
12107 | + std %%o0, [%0 + 0x10] | |
12108 | + std %%o2, [%0 + 0x18] | |
12109 | + " : : "r" (destp), "r" (source1) : "g2", "g3", "g4", "g5", "o0", | |
12110 | + "o1", "o2", "o3", "o4", "o5", "l0", "l1", "l2", "l3", "l4", "l5"); | |
12111 | + destp += 8; | |
12112 | + source1 += 8; | |
12113 | + } | |
12114 | + break; | |
12115 | + case 3: | |
12116 | + source2 = (long *) bh_ptr[2]->b_data; | |
12117 | + for (i = lines; i > 0; i--) { | |
12118 | + __asm__ __volatile__(" | |
12119 | + ldd [%0 + 0x00], %%g2 | |
12120 | + ldd [%0 + 0x08], %%g4 | |
12121 | + ldd [%0 + 0x10], %%o0 | |
12122 | + ldd [%0 + 0x18], %%o2 | |
12123 | + ldd [%1 + 0x00], %%o4 | |
12124 | + ldd [%1 + 0x08], %%l0 | |
12125 | + ldd [%1 + 0x10], %%l2 | |
12126 | + ldd [%1 + 0x18], %%l4 | |
12127 | + xor %%g2, %%o4, %%g2 | |
12128 | + xor %%g3, %%o5, %%g3 | |
12129 | + ldd [%2 + 0x00], %%o4 | |
12130 | + xor %%g4, %%l0, %%g4 | |
12131 | + xor %%g5, %%l1, %%g5 | |
12132 | + ldd [%2 + 0x08], %%l0 | |
12133 | + xor %%o0, %%l2, %%o0 | |
12134 | + xor %%o1, %%l3, %%o1 | |
12135 | + ldd [%2 + 0x10], %%l2 | |
12136 | + xor %%o2, %%l4, %%o2 | |
12137 | + xor %%o3, %%l5, %%o3 | |
12138 | + ldd [%2 + 0x18], %%l4 | |
12139 | + xor %%g2, %%o4, %%g2 | |
12140 | + xor %%g3, %%o5, %%g3 | |
12141 | + xor %%g4, %%l0, %%g4 | |
12142 | + xor %%g5, %%l1, %%g5 | |
12143 | + xor %%o0, %%l2, %%o0 | |
12144 | + xor %%o1, %%l3, %%o1 | |
12145 | + xor %%o2, %%l4, %%o2 | |
12146 | + xor %%o3, %%l5, %%o3 | |
12147 | + std %%g2, [%0 + 0x00] | |
12148 | + std %%g4, [%0 + 0x08] | |
12149 | + std %%o0, [%0 + 0x10] | |
12150 | + std %%o2, [%0 + 0x18] | |
12151 | + " : : "r" (destp), "r" (source1), "r" (source2) | |
12152 | + : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5", | |
12153 | + "l0", "l1", "l2", "l3", "l4", "l5"); | |
12154 | + destp += 8; | |
12155 | + source1 += 8; | |
12156 | + source2 += 8; | |
12157 | + } | |
12158 | + break; | |
12159 | + case 4: | |
12160 | + source2 = (long *) bh_ptr[2]->b_data; | |
12161 | + source3 = (long *) bh_ptr[3]->b_data; | |
12162 | + for (i = lines; i > 0; i--) { | |
12163 | + __asm__ __volatile__(" | |
12164 | + ldd [%0 + 0x00], %%g2 | |
12165 | + ldd [%0 + 0x08], %%g4 | |
12166 | + ldd [%0 + 0x10], %%o0 | |
12167 | + ldd [%0 + 0x18], %%o2 | |
12168 | + ldd [%1 + 0x00], %%o4 | |
12169 | + ldd [%1 + 0x08], %%l0 | |
12170 | + ldd [%1 + 0x10], %%l2 | |
12171 | + ldd [%1 + 0x18], %%l4 | |
12172 | + xor %%g2, %%o4, %%g2 | |
12173 | + xor %%g3, %%o5, %%g3 | |
12174 | + ldd [%2 + 0x00], %%o4 | |
12175 | + xor %%g4, %%l0, %%g4 | |
12176 | + xor %%g5, %%l1, %%g5 | |
12177 | + ldd [%2 + 0x08], %%l0 | |
12178 | + xor %%o0, %%l2, %%o0 | |
12179 | + xor %%o1, %%l3, %%o1 | |
12180 | + ldd [%2 + 0x10], %%l2 | |
12181 | + xor %%o2, %%l4, %%o2 | |
12182 | + xor %%o3, %%l5, %%o3 | |
12183 | + ldd [%2 + 0x18], %%l4 | |
12184 | + xor %%g2, %%o4, %%g2 | |
12185 | + xor %%g3, %%o5, %%g3 | |
12186 | + ldd [%3 + 0x00], %%o4 | |
12187 | + xor %%g4, %%l0, %%g4 | |
12188 | + xor %%g5, %%l1, %%g5 | |
12189 | + ldd [%3 + 0x08], %%l0 | |
12190 | + xor %%o0, %%l2, %%o0 | |
12191 | + xor %%o1, %%l3, %%o1 | |
12192 | + ldd [%3 + 0x10], %%l2 | |
12193 | + xor %%o2, %%l4, %%o2 | |
12194 | + xor %%o3, %%l5, %%o3 | |
12195 | + ldd [%3 + 0x18], %%l4 | |
12196 | + xor %%g2, %%o4, %%g2 | |
12197 | + xor %%g3, %%o5, %%g3 | |
12198 | + xor %%g4, %%l0, %%g4 | |
12199 | + xor %%g5, %%l1, %%g5 | |
12200 | + xor %%o0, %%l2, %%o0 | |
12201 | + xor %%o1, %%l3, %%o1 | |
12202 | + xor %%o2, %%l4, %%o2 | |
12203 | + xor %%o3, %%l5, %%o3 | |
12204 | + std %%g2, [%0 + 0x00] | |
12205 | + std %%g4, [%0 + 0x08] | |
12206 | + std %%o0, [%0 + 0x10] | |
12207 | + std %%o2, [%0 + 0x18] | |
12208 | + " : : "r" (destp), "r" (source1), "r" (source2), "r" (source3) | |
12209 | + : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5", | |
12210 | + "l0", "l1", "l2", "l3", "l4", "l5"); | |
12211 | + destp += 8; | |
12212 | + source1 += 8; | |
12213 | + source2 += 8; | |
12214 | + source3 += 8; | |
12215 | + } | |
12216 | + break; | |
12217 | + case 5: | |
12218 | + source2 = (long *) bh_ptr[2]->b_data; | |
12219 | + source3 = (long *) bh_ptr[3]->b_data; | |
12220 | + source4 = (long *) bh_ptr[4]->b_data; | |
12221 | + for (i = lines; i > 0; i--) { | |
12222 | + __asm__ __volatile__(" | |
12223 | + ldd [%0 + 0x00], %%g2 | |
12224 | + ldd [%0 + 0x08], %%g4 | |
12225 | + ldd [%0 + 0x10], %%o0 | |
12226 | + ldd [%0 + 0x18], %%o2 | |
12227 | + ldd [%1 + 0x00], %%o4 | |
12228 | + ldd [%1 + 0x08], %%l0 | |
12229 | + ldd [%1 + 0x10], %%l2 | |
12230 | + ldd [%1 + 0x18], %%l4 | |
12231 | + xor %%g2, %%o4, %%g2 | |
12232 | + xor %%g3, %%o5, %%g3 | |
12233 | + ldd [%2 + 0x00], %%o4 | |
12234 | + xor %%g4, %%l0, %%g4 | |
12235 | + xor %%g5, %%l1, %%g5 | |
12236 | + ldd [%2 + 0x08], %%l0 | |
12237 | + xor %%o0, %%l2, %%o0 | |
12238 | + xor %%o1, %%l3, %%o1 | |
12239 | + ldd [%2 + 0x10], %%l2 | |
12240 | + xor %%o2, %%l4, %%o2 | |
12241 | + xor %%o3, %%l5, %%o3 | |
12242 | + ldd [%2 + 0x18], %%l4 | |
12243 | + xor %%g2, %%o4, %%g2 | |
12244 | + xor %%g3, %%o5, %%g3 | |
12245 | + ldd [%3 + 0x00], %%o4 | |
12246 | + xor %%g4, %%l0, %%g4 | |
12247 | + xor %%g5, %%l1, %%g5 | |
12248 | + ldd [%3 + 0x08], %%l0 | |
12249 | + xor %%o0, %%l2, %%o0 | |
12250 | + xor %%o1, %%l3, %%o1 | |
12251 | + ldd [%3 + 0x10], %%l2 | |
12252 | + xor %%o2, %%l4, %%o2 | |
12253 | + xor %%o3, %%l5, %%o3 | |
12254 | + ldd [%3 + 0x18], %%l4 | |
12255 | + xor %%g2, %%o4, %%g2 | |
12256 | + xor %%g3, %%o5, %%g3 | |
12257 | + ldd [%4 + 0x00], %%o4 | |
12258 | + xor %%g4, %%l0, %%g4 | |
12259 | + xor %%g5, %%l1, %%g5 | |
12260 | + ldd [%4 + 0x08], %%l0 | |
12261 | + xor %%o0, %%l2, %%o0 | |
12262 | + xor %%o1, %%l3, %%o1 | |
12263 | + ldd [%4 + 0x10], %%l2 | |
12264 | + xor %%o2, %%l4, %%o2 | |
12265 | + xor %%o3, %%l5, %%o3 | |
12266 | + ldd [%4 + 0x18], %%l4 | |
12267 | + xor %%g2, %%o4, %%g2 | |
12268 | + xor %%g3, %%o5, %%g3 | |
12269 | + xor %%g4, %%l0, %%g4 | |
12270 | + xor %%g5, %%l1, %%g5 | |
12271 | + xor %%o0, %%l2, %%o0 | |
12272 | + xor %%o1, %%l3, %%o1 | |
12273 | + xor %%o2, %%l4, %%o2 | |
12274 | + xor %%o3, %%l5, %%o3 | |
12275 | + std %%g2, [%0 + 0x00] | |
12276 | + std %%g4, [%0 + 0x08] | |
12277 | + std %%o0, [%0 + 0x10] | |
12278 | + std %%o2, [%0 + 0x18] | |
12279 | + " : : "r" (destp), "r" (source1), "r" (source2), "r" (source3), "r" (source4) | |
12280 | + : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5", | |
12281 | + "l0", "l1", "l2", "l3", "l4", "l5"); | |
12282 | + destp += 8; | |
12283 | + source1 += 8; | |
12284 | + source2 += 8; | |
12285 | + source3 += 8; | |
12286 | + source4 += 8; | |
12287 | + } | |
12288 | + break; | |
12289 | + } | |
12290 | +} | |
12291 | +#endif /* __sparc_v[78]__ */ | |
12292 | + | |
12293 | +#ifndef __sparc_v9__ | |
12294 | + | |
12295 | +/* | |
12296 | + * this one works reasonably on any x86 CPU | |
12297 | + * (send me an assembly version for inclusion if you can make it faster) | |
12298 | + * | |
12299 | + * this one is just as fast as written in pure assembly on x86. | |
12300 | + * the reason for this separate version is that the | |
12301 | + * fast open-coded xor routine "32reg" produces suboptimal code | |
12302 | + * on x86, due to lack of registers. | |
12303 | + */ | |
12304 | +XORBLOCK_TEMPLATE(8regs) | |
12305 | +{ | |
12306 | + int len = bh_ptr[0]->b_size; | |
12307 | + long *destp = (long *) bh_ptr[0]->b_data; | |
12308 | + long *source1, *source2, *source3, *source4; | |
12309 | + long lines = len / (sizeof (long)) / 8, i; | |
12310 | + | |
12311 | + switch(count) { | |
12312 | + case 2: | |
12313 | + source1 = (long *) bh_ptr[1]->b_data; | |
12314 | + for (i = lines; i > 0; i--) { | |
12315 | + *(destp + 0) ^= *(source1 + 0); | |
12316 | + *(destp + 1) ^= *(source1 + 1); | |
12317 | + *(destp + 2) ^= *(source1 + 2); | |
12318 | + *(destp + 3) ^= *(source1 + 3); | |
12319 | + *(destp + 4) ^= *(source1 + 4); | |
12320 | + *(destp + 5) ^= *(source1 + 5); | |
12321 | + *(destp + 6) ^= *(source1 + 6); | |
12322 | + *(destp + 7) ^= *(source1 + 7); | |
12323 | + source1 += 8; | |
12324 | + destp += 8; | |
12325 | + } | |
12326 | + break; | |
12327 | + case 3: | |
12328 | + source2 = (long *) bh_ptr[2]->b_data; | |
12329 | + source1 = (long *) bh_ptr[1]->b_data; | |
12330 | + for (i = lines; i > 0; i--) { | |
12331 | + *(destp + 0) ^= *(source1 + 0); | |
12332 | + *(destp + 0) ^= *(source2 + 0); | |
12333 | + *(destp + 1) ^= *(source1 + 1); | |
12334 | + *(destp + 1) ^= *(source2 + 1); | |
12335 | + *(destp + 2) ^= *(source1 + 2); | |
12336 | + *(destp + 2) ^= *(source2 + 2); | |
12337 | + *(destp + 3) ^= *(source1 + 3); | |
12338 | + *(destp + 3) ^= *(source2 + 3); | |
12339 | + *(destp + 4) ^= *(source1 + 4); | |
12340 | + *(destp + 4) ^= *(source2 + 4); | |
12341 | + *(destp + 5) ^= *(source1 + 5); | |
12342 | + *(destp + 5) ^= *(source2 + 5); | |
12343 | + *(destp + 6) ^= *(source1 + 6); | |
12344 | + *(destp + 6) ^= *(source2 + 6); | |
12345 | + *(destp + 7) ^= *(source1 + 7); | |
12346 | + *(destp + 7) ^= *(source2 + 7); | |
12347 | + source1 += 8; | |
12348 | + source2 += 8; | |
12349 | + destp += 8; | |
12350 | + } | |
12351 | + break; | |
12352 | + case 4: | |
12353 | + source3 = (long *) bh_ptr[3]->b_data; | |
12354 | + source2 = (long *) bh_ptr[2]->b_data; | |
12355 | + source1 = (long *) bh_ptr[1]->b_data; | |
12356 | + for (i = lines; i > 0; i--) { | |
12357 | + *(destp + 0) ^= *(source1 + 0); | |
12358 | + *(destp + 0) ^= *(source2 + 0); | |
12359 | + *(destp + 0) ^= *(source3 + 0); | |
12360 | + *(destp + 1) ^= *(source1 + 1); | |
12361 | + *(destp + 1) ^= *(source2 + 1); | |
12362 | + *(destp + 1) ^= *(source3 + 1); | |
12363 | + *(destp + 2) ^= *(source1 + 2); | |
12364 | + *(destp + 2) ^= *(source2 + 2); | |
12365 | + *(destp + 2) ^= *(source3 + 2); | |
12366 | + *(destp + 3) ^= *(source1 + 3); | |
12367 | + *(destp + 3) ^= *(source2 + 3); | |
12368 | + *(destp + 3) ^= *(source3 + 3); | |
12369 | + *(destp + 4) ^= *(source1 + 4); | |
12370 | + *(destp + 4) ^= *(source2 + 4); | |
12371 | + *(destp + 4) ^= *(source3 + 4); | |
12372 | + *(destp + 5) ^= *(source1 + 5); | |
12373 | + *(destp + 5) ^= *(source2 + 5); | |
12374 | + *(destp + 5) ^= *(source3 + 5); | |
12375 | + *(destp + 6) ^= *(source1 + 6); | |
12376 | + *(destp + 6) ^= *(source2 + 6); | |
12377 | + *(destp + 6) ^= *(source3 + 6); | |
12378 | + *(destp + 7) ^= *(source1 + 7); | |
12379 | + *(destp + 7) ^= *(source2 + 7); | |
12380 | + *(destp + 7) ^= *(source3 + 7); | |
12381 | + source1 += 8; | |
12382 | + source2 += 8; | |
12383 | + source3 += 8; | |
12384 | + destp += 8; | |
12385 | + } | |
12386 | + break; | |
12387 | + case 5: | |
12388 | + source4 = (long *) bh_ptr[4]->b_data; | |
12389 | + source3 = (long *) bh_ptr[3]->b_data; | |
12390 | + source2 = (long *) bh_ptr[2]->b_data; | |
12391 | + source1 = (long *) bh_ptr[1]->b_data; | |
12392 | + for (i = lines; i > 0; i--) { | |
12393 | + *(destp + 0) ^= *(source1 + 0); | |
12394 | + *(destp + 0) ^= *(source2 + 0); | |
12395 | + *(destp + 0) ^= *(source3 + 0); | |
12396 | + *(destp + 0) ^= *(source4 + 0); | |
12397 | + *(destp + 1) ^= *(source1 + 1); | |
12398 | + *(destp + 1) ^= *(source2 + 1); | |
12399 | + *(destp + 1) ^= *(source3 + 1); | |
12400 | + *(destp + 1) ^= *(source4 + 1); | |
12401 | + *(destp + 2) ^= *(source1 + 2); | |
12402 | + *(destp + 2) ^= *(source2 + 2); | |
12403 | + *(destp + 2) ^= *(source3 + 2); | |
12404 | + *(destp + 2) ^= *(source4 + 2); | |
12405 | + *(destp + 3) ^= *(source1 + 3); | |
12406 | + *(destp + 3) ^= *(source2 + 3); | |
12407 | + *(destp + 3) ^= *(source3 + 3); | |
12408 | + *(destp + 3) ^= *(source4 + 3); | |
12409 | + *(destp + 4) ^= *(source1 + 4); | |
12410 | + *(destp + 4) ^= *(source2 + 4); | |
12411 | + *(destp + 4) ^= *(source3 + 4); | |
12412 | + *(destp + 4) ^= *(source4 + 4); | |
12413 | + *(destp + 5) ^= *(source1 + 5); | |
12414 | + *(destp + 5) ^= *(source2 + 5); | |
12415 | + *(destp + 5) ^= *(source3 + 5); | |
12416 | + *(destp + 5) ^= *(source4 + 5); | |
12417 | + *(destp + 6) ^= *(source1 + 6); | |
12418 | + *(destp + 6) ^= *(source2 + 6); | |
12419 | + *(destp + 6) ^= *(source3 + 6); | |
12420 | + *(destp + 6) ^= *(source4 + 6); | |
12421 | + *(destp + 7) ^= *(source1 + 7); | |
12422 | + *(destp + 7) ^= *(source2 + 7); | |
12423 | + *(destp + 7) ^= *(source3 + 7); | |
12424 | + *(destp + 7) ^= *(source4 + 7); | |
12425 | + source1 += 8; | |
12426 | + source2 += 8; | |
12427 | + source3 += 8; | |
12428 | + source4 += 8; | |
12429 | + destp += 8; | |
12430 | + } | |
12431 | + break; | |
12432 | + } | |
12433 | +} | |
12434 | + | |
12435 | +/* | |
12436 | + * platform independent RAID5 checksum calculation, this should | |
12437 | + * be very fast on any platform that has a decent amount of | |
12438 | + * registers. (32 or more) | |
12439 | + */ | |
12440 | +XORBLOCK_TEMPLATE(32regs) | |
12441 | +{ | |
12442 | + int size = bh_ptr[0]->b_size; | |
12443 | + int lines = size / (sizeof (long)) / 8, i; | |
12444 | + long *destp = (long *) bh_ptr[0]->b_data; | |
12445 | + long *source1, *source2, *source3, *source4; | |
12446 | + | |
12447 | + /* LOTS of registers available... | |
12448 | + We do explicite loop-unrolling here for code which | |
12449 | + favours RISC machines. In fact this is almoast direct | |
12450 | + RISC assembly on Alpha and SPARC :-) */ | |
12451 | + | |
12452 | + | |
12453 | + switch(count) { | |
12454 | + case 2: | |
12455 | + source1 = (long *) bh_ptr[1]->b_data; | |
12456 | + for (i = lines; i > 0; i--) { | |
12457 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
12458 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
12459 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
12460 | + d2 = destp[2]; | |
12461 | + d3 = destp[3]; | |
12462 | + d4 = destp[4]; | |
12463 | + d5 = destp[5]; | |
12464 | + d6 = destp[6]; | |
12465 | + d7 = destp[7]; | |
12466 | + d0 ^= source1[0]; | |
12467 | + d1 ^= source1[1]; | |
12468 | + d2 ^= source1[2]; | |
12469 | + d3 ^= source1[3]; | |
12470 | + d4 ^= source1[4]; | |
12471 | + d5 ^= source1[5]; | |
12472 | + d6 ^= source1[6]; | |
12473 | + d7 ^= source1[7]; | |
12474 | + destp[0] = d0; /* Store the result (in burts) */ | |
12475 | + destp[1] = d1; | |
12476 | + destp[2] = d2; | |
12477 | + destp[3] = d3; | |
12478 | + destp[4] = d4; /* Store the result (in burts) */ | |
12479 | + destp[5] = d5; | |
12480 | + destp[6] = d6; | |
12481 | + destp[7] = d7; | |
12482 | + source1 += 8; | |
12483 | + destp += 8; | |
12484 | + } | |
12485 | + break; | |
12486 | + case 3: | |
12487 | + source2 = (long *) bh_ptr[2]->b_data; | |
12488 | + source1 = (long *) bh_ptr[1]->b_data; | |
12489 | + for (i = lines; i > 0; i--) { | |
12490 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
12491 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
12492 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
12493 | + d2 = destp[2]; | |
12494 | + d3 = destp[3]; | |
12495 | + d4 = destp[4]; | |
12496 | + d5 = destp[5]; | |
12497 | + d6 = destp[6]; | |
12498 | + d7 = destp[7]; | |
12499 | + d0 ^= source1[0]; | |
12500 | + d1 ^= source1[1]; | |
12501 | + d2 ^= source1[2]; | |
12502 | + d3 ^= source1[3]; | |
12503 | + d4 ^= source1[4]; | |
12504 | + d5 ^= source1[5]; | |
12505 | + d6 ^= source1[6]; | |
12506 | + d7 ^= source1[7]; | |
12507 | + d0 ^= source2[0]; | |
12508 | + d1 ^= source2[1]; | |
12509 | + d2 ^= source2[2]; | |
12510 | + d3 ^= source2[3]; | |
12511 | + d4 ^= source2[4]; | |
12512 | + d5 ^= source2[5]; | |
12513 | + d6 ^= source2[6]; | |
12514 | + d7 ^= source2[7]; | |
12515 | + destp[0] = d0; /* Store the result (in burts) */ | |
12516 | + destp[1] = d1; | |
12517 | + destp[2] = d2; | |
12518 | + destp[3] = d3; | |
12519 | + destp[4] = d4; /* Store the result (in burts) */ | |
12520 | + destp[5] = d5; | |
12521 | + destp[6] = d6; | |
12522 | + destp[7] = d7; | |
12523 | + source1 += 8; | |
12524 | + source2 += 8; | |
12525 | + destp += 8; | |
12526 | + } | |
12527 | + break; | |
12528 | + case 4: | |
12529 | + source3 = (long *) bh_ptr[3]->b_data; | |
12530 | + source2 = (long *) bh_ptr[2]->b_data; | |
12531 | + source1 = (long *) bh_ptr[1]->b_data; | |
12532 | + for (i = lines; i > 0; i--) { | |
12533 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
12534 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
12535 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
12536 | + d2 = destp[2]; | |
12537 | + d3 = destp[3]; | |
12538 | + d4 = destp[4]; | |
12539 | + d5 = destp[5]; | |
12540 | + d6 = destp[6]; | |
12541 | + d7 = destp[7]; | |
12542 | + d0 ^= source1[0]; | |
12543 | + d1 ^= source1[1]; | |
12544 | + d2 ^= source1[2]; | |
12545 | + d3 ^= source1[3]; | |
12546 | + d4 ^= source1[4]; | |
12547 | + d5 ^= source1[5]; | |
12548 | + d6 ^= source1[6]; | |
12549 | + d7 ^= source1[7]; | |
12550 | + d0 ^= source2[0]; | |
12551 | + d1 ^= source2[1]; | |
12552 | + d2 ^= source2[2]; | |
12553 | + d3 ^= source2[3]; | |
12554 | + d4 ^= source2[4]; | |
12555 | + d5 ^= source2[5]; | |
12556 | + d6 ^= source2[6]; | |
12557 | + d7 ^= source2[7]; | |
12558 | + d0 ^= source3[0]; | |
12559 | + d1 ^= source3[1]; | |
12560 | + d2 ^= source3[2]; | |
12561 | + d3 ^= source3[3]; | |
12562 | + d4 ^= source3[4]; | |
12563 | + d5 ^= source3[5]; | |
12564 | + d6 ^= source3[6]; | |
12565 | + d7 ^= source3[7]; | |
12566 | + destp[0] = d0; /* Store the result (in burts) */ | |
12567 | + destp[1] = d1; | |
12568 | + destp[2] = d2; | |
12569 | + destp[3] = d3; | |
12570 | + destp[4] = d4; /* Store the result (in burts) */ | |
12571 | + destp[5] = d5; | |
12572 | + destp[6] = d6; | |
12573 | + destp[7] = d7; | |
12574 | + source1 += 8; | |
12575 | + source2 += 8; | |
12576 | + source3 += 8; | |
12577 | + destp += 8; | |
12578 | + } | |
12579 | + break; | |
12580 | + case 5: | |
12581 | + source4 = (long *) bh_ptr[4]->b_data; | |
12582 | + source3 = (long *) bh_ptr[3]->b_data; | |
12583 | + source2 = (long *) bh_ptr[2]->b_data; | |
12584 | + source1 = (long *) bh_ptr[1]->b_data; | |
12585 | + for (i = lines; i > 0; i--) { | |
12586 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
12587 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
12588 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
12589 | + d2 = destp[2]; | |
12590 | + d3 = destp[3]; | |
12591 | + d4 = destp[4]; | |
12592 | + d5 = destp[5]; | |
12593 | + d6 = destp[6]; | |
12594 | + d7 = destp[7]; | |
12595 | + d0 ^= source1[0]; | |
12596 | + d1 ^= source1[1]; | |
12597 | + d2 ^= source1[2]; | |
12598 | + d3 ^= source1[3]; | |
12599 | + d4 ^= source1[4]; | |
12600 | + d5 ^= source1[5]; | |
12601 | + d6 ^= source1[6]; | |
12602 | + d7 ^= source1[7]; | |
12603 | + d0 ^= source2[0]; | |
12604 | + d1 ^= source2[1]; | |
12605 | + d2 ^= source2[2]; | |
12606 | + d3 ^= source2[3]; | |
12607 | + d4 ^= source2[4]; | |
12608 | + d5 ^= source2[5]; | |
12609 | + d6 ^= source2[6]; | |
12610 | + d7 ^= source2[7]; | |
12611 | + d0 ^= source3[0]; | |
12612 | + d1 ^= source3[1]; | |
12613 | + d2 ^= source3[2]; | |
12614 | + d3 ^= source3[3]; | |
12615 | + d4 ^= source3[4]; | |
12616 | + d5 ^= source3[5]; | |
12617 | + d6 ^= source3[6]; | |
12618 | + d7 ^= source3[7]; | |
12619 | + d0 ^= source4[0]; | |
12620 | + d1 ^= source4[1]; | |
12621 | + d2 ^= source4[2]; | |
12622 | + d3 ^= source4[3]; | |
12623 | + d4 ^= source4[4]; | |
12624 | + d5 ^= source4[5]; | |
12625 | + d6 ^= source4[6]; | |
12626 | + d7 ^= source4[7]; | |
12627 | + destp[0] = d0; /* Store the result (in burts) */ | |
12628 | + destp[1] = d1; | |
12629 | + destp[2] = d2; | |
12630 | + destp[3] = d3; | |
12631 | + destp[4] = d4; /* Store the result (in burts) */ | |
12632 | + destp[5] = d5; | |
12633 | + destp[6] = d6; | |
12634 | + destp[7] = d7; | |
12635 | + source1 += 8; | |
12636 | + source2 += 8; | |
12637 | + source3 += 8; | |
12638 | + source4 += 8; | |
12639 | + destp += 8; | |
12640 | + } | |
12641 | + break; | |
12642 | + } | |
12643 | +} | |
12644 | + | |
12645 | +/* | |
12646 | + * (the -6*32 shift factor colors the cache) | |
12647 | + */ | |
12648 | +#define SIZE (PAGE_SIZE-6*32) | |
12649 | + | |
12650 | +static void xor_speed ( struct xor_block_template * func, | |
12651 | + struct buffer_head *b1, struct buffer_head *b2) | |
12652 | +{ | |
12653 | + int speed; | |
12654 | + unsigned long now; | |
12655 | + int i, count, max; | |
12656 | + struct buffer_head *bh_ptr[6]; | |
12657 | + | |
12658 | + func->next = xor_functions; | |
12659 | + xor_functions = func; | |
12660 | + bh_ptr[0] = b1; | |
12661 | + bh_ptr[1] = b2; | |
12662 | + | |
12663 | + /* | |
12664 | + * count the number of XORs done during a whole jiffy. | |
12665 | + * calculate the speed of checksumming from this. | |
12666 | + * (we use a 2-page allocation to have guaranteed | |
12667 | + * color L1-cache layout) | |
12668 | + */ | |
12669 | + max = 0; | |
12670 | + for (i = 0; i < 5; i++) { | |
12671 | + now = jiffies; | |
12672 | + count = 0; | |
12673 | + while (jiffies == now) { | |
12674 | + mb(); | |
12675 | + func->xor_block(2,bh_ptr); | |
12676 | + mb(); | |
12677 | + count++; | |
12678 | + mb(); | |
12679 | + } | |
12680 | + if (count > max) | |
12681 | + max = count; | |
12682 | + } | |
12683 | + | |
12684 | + speed = max * (HZ*SIZE/1024); | |
12685 | + func->speed = speed; | |
12686 | + | |
12687 | + printk( " %-10s: %5d.%03d MB/sec\n", func->name, | |
12688 | + speed / 1000, speed % 1000); | |
12689 | +} | |
12690 | + | |
12691 | +static inline void pick_fastest_function(void) | |
12692 | +{ | |
12693 | + struct xor_block_template *f, *fastest; | |
12694 | + | |
12695 | + fastest = xor_functions; | |
12696 | + for (f = fastest; f; f = f->next) { | |
12697 | + if (f->speed > fastest->speed) | |
12698 | + fastest = f; | |
12699 | + } | |
12700 | +#ifdef CONFIG_X86_XMM | |
12701 | + if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) { | |
12702 | + fastest = &t_xor_block_pIII_kni; | |
12703 | + } | |
12704 | +#endif | |
12705 | + xor_block = fastest->xor_block; | |
12706 | + printk( "using fastest function: %s (%d.%03d MB/sec)\n", fastest->name, | |
12707 | + fastest->speed / 1000, fastest->speed % 1000); | |
12708 | +} | |
12709 | + | |
12710 | + | |
12711 | +void calibrate_xor_block(void) | |
12712 | +{ | |
12713 | + struct buffer_head b1, b2; | |
12714 | + | |
12715 | + memset(&b1,0,sizeof(b1)); | |
12716 | + b2 = b1; | |
12717 | + | |
12718 | + b1.b_data = (char *) md__get_free_pages(GFP_KERNEL,2); | |
12719 | + if (!b1.b_data) { | |
12720 | + pick_fastest_function(); | |
12721 | + return; | |
12722 | + } | |
12723 | + b2.b_data = b1.b_data + 2*PAGE_SIZE + SIZE; | |
12724 | + | |
12725 | + b1.b_size = SIZE; | |
12726 | + | |
12727 | + printk(KERN_INFO "raid5: measuring checksumming speed\n"); | |
12728 | + | |
12729 | + sti(); /* should be safe */ | |
12730 | + | |
12731 | +#if defined(__sparc__) && !defined(__sparc_v9__) | |
12732 | + printk(KERN_INFO "raid5: trying high-speed SPARC checksum routine\n"); | |
12733 | + xor_speed(&t_xor_block_SPARC,&b1,&b2); | |
12734 | +#endif | |
12735 | + | |
12736 | +#ifdef CONFIG_X86_XMM | |
12737 | + if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) { | |
12738 | + printk(KERN_INFO | |
12739 | + "raid5: KNI detected, trying cache-avoiding KNI checksum routine\n"); | |
12740 | + /* we force the use of the KNI xor block because it | |
12741 | + can write around l2. we may also be able | |
12742 | + to load into the l1 only depending on how | |
12743 | + the cpu deals with a load to a line that is | |
12744 | + being prefetched. | |
12745 | + */ | |
12746 | + xor_speed(&t_xor_block_pIII_kni,&b1,&b2); | |
12747 | + } | |
12748 | +#endif /* CONFIG_X86_XMM */ | |
12749 | + | |
12750 | +#ifdef __i386__ | |
12751 | + | |
12752 | + if (md_cpu_has_mmx()) { | |
12753 | + printk(KERN_INFO | |
12754 | + "raid5: MMX detected, trying high-speed MMX checksum routines\n"); | |
12755 | + xor_speed(&t_xor_block_pII_mmx,&b1,&b2); | |
12756 | + xor_speed(&t_xor_block_p5_mmx,&b1,&b2); | |
12757 | + } | |
12758 | + | |
12759 | +#endif /* __i386__ */ | |
12760 | + | |
12761 | + | |
12762 | + xor_speed(&t_xor_block_8regs,&b1,&b2); | |
12763 | + xor_speed(&t_xor_block_32regs,&b1,&b2); | |
12764 | + | |
12765 | + free_pages((unsigned long)b1.b_data,2); | |
12766 | + pick_fastest_function(); | |
12767 | +} | |
12768 | + | |
12769 | +#else /* __sparc_v9__ */ | |
12770 | + | |
12771 | +void calibrate_xor_block(void) | |
12772 | +{ | |
12773 | + printk(KERN_INFO "raid5: using high-speed VIS checksum routine\n"); | |
12774 | + xor_block = xor_block_VIS; | |
12775 | +} | |
12776 | + | |
12777 | +#endif /* __sparc_v9__ */ | |
12778 | + | |
12779 | +MD_EXPORT_SYMBOL(xor_block); | |
12780 | + | |
12781 | diff -ruN linux.orig/include/asm-alpha/md.h linux-2.2.16/include/asm-alpha/md.h | |
12782 | --- linux.orig/include/asm-alpha/md.h Fri May 8 09:17:13 1998 | |
12783 | +++ linux-2.2.16/include/asm-alpha/md.h Thu Jan 1 01:00:00 1970 | |
12784 | @@ -1,13 +0,0 @@ | |
12785 | -/* $Id$ | |
12786 | - * md.h: High speed xor_block operation for RAID4/5 | |
12787 | - * | |
12788 | - */ | |
12789 | - | |
12790 | -#ifndef __ASM_MD_H | |
12791 | -#define __ASM_MD_H | |
12792 | - | |
12793 | -/* #define HAVE_ARCH_XORBLOCK */ | |
12794 | - | |
12795 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
12796 | - | |
12797 | -#endif /* __ASM_MD_H */ | |
12798 | diff -ruN linux.orig/include/asm-i386/md.h linux-2.2.16/include/asm-i386/md.h | |
12799 | --- linux.orig/include/asm-i386/md.h Fri May 8 09:17:13 1998 | |
12800 | +++ linux-2.2.16/include/asm-i386/md.h Thu Jan 1 01:00:00 1970 | |
12801 | @@ -1,13 +0,0 @@ | |
12802 | -/* $Id$ | |
12803 | - * md.h: High speed xor_block operation for RAID4/5 | |
12804 | - * | |
12805 | - */ | |
12806 | - | |
12807 | -#ifndef __ASM_MD_H | |
12808 | -#define __ASM_MD_H | |
12809 | - | |
12810 | -/* #define HAVE_ARCH_XORBLOCK */ | |
12811 | - | |
12812 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
12813 | - | |
12814 | -#endif /* __ASM_MD_H */ | |
12815 | diff -ruN linux.orig/include/asm-m68k/md.h linux-2.2.16/include/asm-m68k/md.h | |
12816 | --- linux.orig/include/asm-m68k/md.h Fri May 8 09:15:22 1998 | |
12817 | +++ linux-2.2.16/include/asm-m68k/md.h Thu Jan 1 01:00:00 1970 | |
12818 | @@ -1,13 +0,0 @@ | |
12819 | -/* $Id$ | |
12820 | - * md.h: High speed xor_block operation for RAID4/5 | |
12821 | - * | |
12822 | - */ | |
12823 | - | |
12824 | -#ifndef __ASM_MD_H | |
12825 | -#define __ASM_MD_H | |
12826 | - | |
12827 | -/* #define HAVE_ARCH_XORBLOCK */ | |
12828 | - | |
12829 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
12830 | - | |
12831 | -#endif /* __ASM_MD_H */ | |
12832 | diff -ruN linux.orig/include/asm-ppc/md.h linux-2.2.16/include/asm-ppc/md.h | |
12833 | --- linux.orig/include/asm-ppc/md.h Wed Oct 27 02:53:42 1999 | |
12834 | +++ linux-2.2.16/include/asm-ppc/md.h Thu Jan 1 01:00:00 1970 | |
12835 | @@ -1,13 +0,0 @@ | |
12836 | -/* $Id$ | |
12837 | - * md.h: High speed xor_block operation for RAID4/5 | |
12838 | - * | |
12839 | - */ | |
12840 | - | |
12841 | -#ifndef __ASM_MD_H | |
12842 | -#define __ASM_MD_H | |
12843 | - | |
12844 | -/* #define HAVE_ARCH_XORBLOCK */ | |
12845 | - | |
12846 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
12847 | - | |
12848 | -#endif /* __ASM_MD_H */ | |
12849 | diff -ruN linux.orig/include/asm-sparc/md.h linux-2.2.16/include/asm-sparc/md.h | |
12850 | --- linux.orig/include/asm-sparc/md.h Tue Jan 13 00:15:54 1998 | |
12851 | +++ linux-2.2.16/include/asm-sparc/md.h Thu Jan 1 01:00:00 1970 | |
12852 | @@ -1,13 +0,0 @@ | |
12853 | -/* $Id$ | |
12854 | - * md.h: High speed xor_block operation for RAID4/5 | |
12855 | - * | |
12856 | - */ | |
12857 | - | |
12858 | -#ifndef __ASM_MD_H | |
12859 | -#define __ASM_MD_H | |
12860 | - | |
12861 | -/* #define HAVE_ARCH_XORBLOCK */ | |
12862 | - | |
12863 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
12864 | - | |
12865 | -#endif /* __ASM_MD_H */ | |
12866 | diff -ruN linux.orig/include/asm-sparc64/md.h linux-2.2.16/include/asm-sparc64/md.h | |
12867 | --- linux.orig/include/asm-sparc64/md.h Tue Jan 13 00:15:58 1998 | |
12868 | +++ linux-2.2.16/include/asm-sparc64/md.h Thu Jan 1 01:00:00 1970 | |
12869 | @@ -1,91 +0,0 @@ | |
12870 | -/* $Id$ | |
12871 | - * md.h: High speed xor_block operation for RAID4/5 | |
12872 | - * utilizing the UltraSparc Visual Instruction Set. | |
12873 | - * | |
12874 | - * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | |
12875 | - */ | |
12876 | - | |
12877 | -#ifndef __ASM_MD_H | |
12878 | -#define __ASM_MD_H | |
12879 | - | |
12880 | -#include <asm/head.h> | |
12881 | -#include <asm/asi.h> | |
12882 | - | |
12883 | -#define HAVE_ARCH_XORBLOCK | |
12884 | - | |
12885 | -#define MD_XORBLOCK_ALIGNMENT 64 | |
12886 | - | |
12887 | -/* void __xor_block (char *dest, char *src, long len) | |
12888 | - * { | |
12889 | - * while (len--) *dest++ ^= *src++; | |
12890 | - * } | |
12891 | - * | |
12892 | - * Requirements: | |
12893 | - * !(((long)dest | (long)src) & (MD_XORBLOCK_ALIGNMENT - 1)) && | |
12894 | - * !(len & 127) && len >= 256 | |
12895 | - */ | |
12896 | - | |
12897 | -static inline void __xor_block (char *dest, char *src, long len) | |
12898 | -{ | |
12899 | - __asm__ __volatile__ (" | |
12900 | - wr %%g0, %3, %%fprs | |
12901 | - wr %%g0, %4, %%asi | |
12902 | - membar #LoadStore|#StoreLoad|#StoreStore | |
12903 | - sub %2, 128, %2 | |
12904 | - ldda [%0] %4, %%f0 | |
12905 | - ldda [%1] %4, %%f16 | |
12906 | -1: ldda [%0 + 64] %%asi, %%f32 | |
12907 | - fxor %%f0, %%f16, %%f16 | |
12908 | - fxor %%f2, %%f18, %%f18 | |
12909 | - fxor %%f4, %%f20, %%f20 | |
12910 | - fxor %%f6, %%f22, %%f22 | |
12911 | - fxor %%f8, %%f24, %%f24 | |
12912 | - fxor %%f10, %%f26, %%f26 | |
12913 | - fxor %%f12, %%f28, %%f28 | |
12914 | - fxor %%f14, %%f30, %%f30 | |
12915 | - stda %%f16, [%0] %4 | |
12916 | - ldda [%1 + 64] %%asi, %%f48 | |
12917 | - ldda [%0 + 128] %%asi, %%f0 | |
12918 | - fxor %%f32, %%f48, %%f48 | |
12919 | - fxor %%f34, %%f50, %%f50 | |
12920 | - add %0, 128, %0 | |
12921 | - fxor %%f36, %%f52, %%f52 | |
12922 | - add %1, 128, %1 | |
12923 | - fxor %%f38, %%f54, %%f54 | |
12924 | - subcc %2, 128, %2 | |
12925 | - fxor %%f40, %%f56, %%f56 | |
12926 | - fxor %%f42, %%f58, %%f58 | |
12927 | - fxor %%f44, %%f60, %%f60 | |
12928 | - fxor %%f46, %%f62, %%f62 | |
12929 | - stda %%f48, [%0 - 64] %%asi | |
12930 | - bne,pt %%xcc, 1b | |
12931 | - ldda [%1] %4, %%f16 | |
12932 | - ldda [%0 + 64] %%asi, %%f32 | |
12933 | - fxor %%f0, %%f16, %%f16 | |
12934 | - fxor %%f2, %%f18, %%f18 | |
12935 | - fxor %%f4, %%f20, %%f20 | |
12936 | - fxor %%f6, %%f22, %%f22 | |
12937 | - fxor %%f8, %%f24, %%f24 | |
12938 | - fxor %%f10, %%f26, %%f26 | |
12939 | - fxor %%f12, %%f28, %%f28 | |
12940 | - fxor %%f14, %%f30, %%f30 | |
12941 | - stda %%f16, [%0] %4 | |
12942 | - ldda [%1 + 64] %%asi, %%f48 | |
12943 | - membar #Sync | |
12944 | - fxor %%f32, %%f48, %%f48 | |
12945 | - fxor %%f34, %%f50, %%f50 | |
12946 | - fxor %%f36, %%f52, %%f52 | |
12947 | - fxor %%f38, %%f54, %%f54 | |
12948 | - fxor %%f40, %%f56, %%f56 | |
12949 | - fxor %%f42, %%f58, %%f58 | |
12950 | - fxor %%f44, %%f60, %%f60 | |
12951 | - fxor %%f46, %%f62, %%f62 | |
12952 | - stda %%f48, [%0 + 64] %%asi | |
12953 | - membar #Sync|#StoreStore|#StoreLoad | |
12954 | - wr %%g0, 0, %%fprs | |
12955 | - " : : | |
12956 | - "r" (dest), "r" (src), "r" (len), "i" (FPRS_FEF), "i" (ASI_BLK_P) : | |
12957 | - "cc", "memory"); | |
12958 | -} | |
12959 | - | |
12960 | -#endif /* __ASM_MD_H */ | |
12961 | Binary files linux.orig/include/linux/.sysctl.h.rej.swp and linux-2.2.16/include/linux/.sysctl.h.rej.swp differ | |
12962 | diff -ruN linux.orig/include/linux/blkdev.h linux-2.2.16/include/linux/blkdev.h | |
12963 | --- linux.orig/include/linux/blkdev.h Wed Jun 7 23:26:44 2000 | |
12964 | +++ linux-2.2.16/include/linux/blkdev.h Fri Jun 9 11:37:44 2000 | |
12965 | @@ -93,8 +93,9 @@ | |
12966 | extern void make_request(int major,int rw, struct buffer_head * bh); | |
12967 | ||
12968 | /* md needs this function to remap requests */ | |
12969 | -extern int md_map (int minor, kdev_t *rdev, unsigned long *rsector, unsigned long size); | |
12970 | -extern int md_make_request (int minor, int rw, struct buffer_head * bh); | |
12971 | +extern int md_map (kdev_t dev, kdev_t *rdev, | |
12972 | + unsigned long *rsector, unsigned long size); | |
12973 | +extern int md_make_request (struct buffer_head * bh, int rw); | |
12974 | extern int md_error (kdev_t mddev, kdev_t rdev); | |
12975 | ||
12976 | extern int * blk_size[MAX_BLKDEV]; | |
12977 | diff -ruN linux.orig/include/linux/fs.h linux-2.2.16/include/linux/fs.h | |
12978 | --- linux.orig/include/linux/fs.h Wed Jun 7 23:26:44 2000 | |
12979 | +++ linux-2.2.16/include/linux/fs.h Fri Jun 9 11:37:44 2000 | |
12980 | @@ -185,6 +185,7 @@ | |
12981 | #define BH_Lock 2 /* 1 if the buffer is locked */ | |
12982 | #define BH_Req 3 /* 0 if the buffer has been invalidated */ | |
12983 | #define BH_Protected 6 /* 1 if the buffer is protected */ | |
12984 | +#define BH_LowPrio 7 /* 1 if the buffer is lowprio */ | |
12985 | ||
12986 | /* | |
12987 | * Try to keep the most commonly used fields in single cache lines (16 | |
12988 | @@ -755,6 +756,7 @@ | |
12989 | extern void refile_buffer(struct buffer_head * buf); | |
12990 | extern void set_writetime(struct buffer_head * buf, int flag); | |
12991 | extern int try_to_free_buffers(struct page *); | |
12992 | +extern void cache_drop_behind(struct buffer_head *bh); | |
12993 | ||
12994 | extern int nr_buffers; | |
12995 | extern long buffermem; | |
12996 | @@ -775,6 +777,25 @@ | |
12997 | } | |
12998 | } | |
12999 | ||
13000 | +extern inline void mark_buffer_highprio(struct buffer_head * bh) | |
13001 | +{ | |
13002 | + clear_bit(BH_LowPrio, &bh->b_state); | |
13003 | +} | |
13004 | + | |
13005 | +extern inline void mark_buffer_lowprio(struct buffer_head * bh) | |
13006 | +{ | |
13007 | + /* | |
13008 | + * dirty buffers cannot be marked lowprio. | |
13009 | + */ | |
13010 | + if (!buffer_dirty(bh)) | |
13011 | + set_bit(BH_LowPrio, &bh->b_state); | |
13012 | +} | |
13013 | + | |
13014 | +static inline int buffer_lowprio(struct buffer_head * bh) | |
13015 | +{ | |
13016 | + return test_bit(BH_LowPrio, &bh->b_state); | |
13017 | +} | |
13018 | + | |
13019 | extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) | |
13020 | { | |
13021 | if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { | |
13022 | @@ -782,6 +803,23 @@ | |
13023 | if (bh->b_list != BUF_DIRTY) | |
13024 | refile_buffer(bh); | |
13025 | } | |
13026 | + /* | |
13027 | + * if a buffer gets marked dirty then it has to lose | |
13028 | + * it's lowprio state. | |
13029 | + */ | |
13030 | + mark_buffer_highprio(bh); | |
13031 | +} | |
13032 | + | |
13033 | +extern inline void mark_buffer_dirty_lowprio(struct buffer_head * bh) | |
13034 | +{ | |
13035 | + if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { | |
13036 | + if (bh->b_list != BUF_DIRTY) | |
13037 | + refile_buffer(bh); | |
13038 | + /* | |
13039 | + * Mark it lowprio only if it was not dirty before! | |
13040 | + */ | |
13041 | + set_bit(BH_LowPrio, &bh->b_state); | |
13042 | + } | |
13043 | } | |
13044 | ||
13045 | extern int check_disk_change(kdev_t dev); | |
13046 | @@ -855,6 +893,7 @@ | |
13047 | extern struct buffer_head * find_buffer(kdev_t dev, int block, int size); | |
13048 | extern void ll_rw_block(int, int, struct buffer_head * bh[]); | |
13049 | extern int is_read_only(kdev_t); | |
13050 | +extern int is_device_idle(kdev_t); | |
13051 | extern void __brelse(struct buffer_head *); | |
13052 | extern inline void brelse(struct buffer_head *buf) | |
13053 | { | |
13054 | @@ -870,8 +909,12 @@ | |
13055 | extern void set_blocksize(kdev_t dev, int size); | |
13056 | extern unsigned int get_hardblocksize(kdev_t dev); | |
13057 | extern struct buffer_head * bread(kdev_t dev, int block, int size); | |
13058 | +extern struct buffer_head * buffer_ready (kdev_t dev, int block, int size); | |
13059 | +extern void bread_ahead (kdev_t dev, int block, int size); | |
13060 | extern struct buffer_head * breada(kdev_t dev,int block, int size, | |
13061 | unsigned int pos, unsigned int filesize); | |
13062 | +extern struct buffer_head * breada_blocks(kdev_t dev,int block, | |
13063 | + int size, int blocks); | |
13064 | ||
13065 | extern int brw_page(int, struct page *, kdev_t, int [], int, int); | |
13066 | ||
13067 | diff -ruN linux.orig/include/linux/md.h linux-2.2.16/include/linux/md.h | |
13068 | --- linux.orig/include/linux/md.h Fri May 8 09:17:13 1998 | |
13069 | +++ linux-2.2.16/include/linux/md.h Thu Jan 1 01:00:00 1970 | |
13070 | @@ -1,300 +0,0 @@ | |
13071 | -/* | |
13072 | - md.h : Multiple Devices driver for Linux | |
13073 | - Copyright (C) 1994-96 Marc ZYNGIER | |
13074 | - <zyngier@ufr-info-p7.ibp.fr> or | |
13075 | - <maz@gloups.fdn.fr> | |
13076 | - | |
13077 | - This program is free software; you can redistribute it and/or modify | |
13078 | - it under the terms of the GNU General Public License as published by | |
13079 | - the Free Software Foundation; either version 2, or (at your option) | |
13080 | - any later version. | |
13081 | - | |
13082 | - You should have received a copy of the GNU General Public License | |
13083 | - (for example /usr/src/linux/COPYING); if not, write to the Free | |
13084 | - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
13085 | -*/ | |
13086 | - | |
13087 | -#ifndef _MD_H | |
13088 | -#define _MD_H | |
13089 | - | |
13090 | -#include <linux/major.h> | |
13091 | -#include <linux/ioctl.h> | |
13092 | -#include <linux/types.h> | |
13093 | - | |
13094 | -/* | |
13095 | - * Different major versions are not compatible. | |
13096 | - * Different minor versions are only downward compatible. | |
13097 | - * Different patchlevel versions are downward and upward compatible. | |
13098 | - */ | |
13099 | -#define MD_MAJOR_VERSION 0 | |
13100 | -#define MD_MINOR_VERSION 36 | |
13101 | -#define MD_PATCHLEVEL_VERSION 6 | |
13102 | - | |
13103 | -#define MD_DEFAULT_DISK_READAHEAD (256 * 1024) | |
13104 | - | |
13105 | -/* ioctls */ | |
13106 | -#define REGISTER_DEV _IO (MD_MAJOR, 1) | |
13107 | -#define START_MD _IO (MD_MAJOR, 2) | |
13108 | -#define STOP_MD _IO (MD_MAJOR, 3) | |
13109 | -#define REGISTER_DEV_NEW _IO (MD_MAJOR, 4) | |
13110 | - | |
13111 | -/* | |
13112 | - personalities : | |
13113 | - Byte 0 : Chunk size factor | |
13114 | - Byte 1 : Fault tolerance count for each physical device | |
13115 | - ( 0 means no fault tolerance, | |
13116 | - 0xFF means always tolerate faults), not used by now. | |
13117 | - Byte 2 : Personality | |
13118 | - Byte 3 : Reserved. | |
13119 | - */ | |
13120 | - | |
13121 | -#define FAULT_SHIFT 8 | |
13122 | -#define PERSONALITY_SHIFT 16 | |
13123 | - | |
13124 | -#define FACTOR_MASK 0x000000FFUL | |
13125 | -#define FAULT_MASK 0x0000FF00UL | |
13126 | -#define PERSONALITY_MASK 0x00FF0000UL | |
13127 | - | |
13128 | -#define MD_RESERVED 0 /* Not used by now */ | |
13129 | -#define LINEAR (1UL << PERSONALITY_SHIFT) | |
13130 | -#define STRIPED (2UL << PERSONALITY_SHIFT) | |
13131 | -#define RAID0 STRIPED | |
13132 | -#define RAID1 (3UL << PERSONALITY_SHIFT) | |
13133 | -#define RAID5 (4UL << PERSONALITY_SHIFT) | |
13134 | -#define MAX_PERSONALITY 5 | |
13135 | - | |
13136 | -/* | |
13137 | - * MD superblock. | |
13138 | - * | |
13139 | - * The MD superblock maintains some statistics on each MD configuration. | |
13140 | - * Each real device in the MD set contains it near the end of the device. | |
13141 | - * Some of the ideas are copied from the ext2fs implementation. | |
13142 | - * | |
13143 | - * We currently use 4096 bytes as follows: | |
13144 | - * | |
13145 | - * word offset function | |
13146 | - * | |
13147 | - * 0 - 31 Constant generic MD device information. | |
13148 | - * 32 - 63 Generic state information. | |
13149 | - * 64 - 127 Personality specific information. | |
13150 | - * 128 - 511 12 32-words descriptors of the disks in the raid set. | |
13151 | - * 512 - 911 Reserved. | |
13152 | - * 912 - 1023 Disk specific descriptor. | |
13153 | - */ | |
13154 | - | |
13155 | -/* | |
13156 | - * If x is the real device size in bytes, we return an apparent size of: | |
13157 | - * | |
13158 | - * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES | |
13159 | - * | |
13160 | - * and place the 4kB superblock at offset y. | |
13161 | - */ | |
13162 | -#define MD_RESERVED_BYTES (64 * 1024) | |
13163 | -#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) | |
13164 | -#define MD_RESERVED_BLOCKS (MD_RESERVED_BYTES / BLOCK_SIZE) | |
13165 | - | |
13166 | -#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) | |
13167 | -#define MD_NEW_SIZE_BLOCKS(x) ((x & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS) | |
13168 | - | |
13169 | -#define MD_SB_BYTES 4096 | |
13170 | -#define MD_SB_WORDS (MD_SB_BYTES / 4) | |
13171 | -#define MD_SB_BLOCKS (MD_SB_BYTES / BLOCK_SIZE) | |
13172 | -#define MD_SB_SECTORS (MD_SB_BYTES / 512) | |
13173 | - | |
13174 | -/* | |
13175 | - * The following are counted in 32-bit words | |
13176 | - */ | |
13177 | -#define MD_SB_GENERIC_OFFSET 0 | |
13178 | -#define MD_SB_PERSONALITY_OFFSET 64 | |
13179 | -#define MD_SB_DISKS_OFFSET 128 | |
13180 | -#define MD_SB_DESCRIPTOR_OFFSET 992 | |
13181 | - | |
13182 | -#define MD_SB_GENERIC_CONSTANT_WORDS 32 | |
13183 | -#define MD_SB_GENERIC_STATE_WORDS 32 | |
13184 | -#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) | |
13185 | -#define MD_SB_PERSONALITY_WORDS 64 | |
13186 | -#define MD_SB_DISKS_WORDS 384 | |
13187 | -#define MD_SB_DESCRIPTOR_WORDS 32 | |
13188 | -#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) | |
13189 | -#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) | |
13190 | -#define MD_SB_DISKS (MD_SB_DISKS_WORDS / MD_SB_DESCRIPTOR_WORDS) | |
13191 | - | |
13192 | -/* | |
13193 | - * Device "operational" state bits | |
13194 | - */ | |
13195 | -#define MD_FAULTY_DEVICE 0 /* Device is faulty / operational */ | |
13196 | -#define MD_ACTIVE_DEVICE 1 /* Device is a part or the raid set / spare disk */ | |
13197 | -#define MD_SYNC_DEVICE 2 /* Device is in sync with the raid set */ | |
13198 | - | |
13199 | -typedef struct md_device_descriptor_s { | |
13200 | - __u32 number; /* 0 Device number in the entire set */ | |
13201 | - __u32 major; /* 1 Device major number */ | |
13202 | - __u32 minor; /* 2 Device minor number */ | |
13203 | - __u32 raid_disk; /* 3 The role of the device in the raid set */ | |
13204 | - __u32 state; /* 4 Operational state */ | |
13205 | - __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; | |
13206 | -} md_descriptor_t; | |
13207 | - | |
13208 | -#define MD_SB_MAGIC 0xa92b4efc | |
13209 | - | |
13210 | -/* | |
13211 | - * Superblock state bits | |
13212 | - */ | |
13213 | -#define MD_SB_CLEAN 0 | |
13214 | -#define MD_SB_ERRORS 1 | |
13215 | - | |
13216 | -typedef struct md_superblock_s { | |
13217 | - | |
13218 | - /* | |
13219 | - * Constant generic information | |
13220 | - */ | |
13221 | - __u32 md_magic; /* 0 MD identifier */ | |
13222 | - __u32 major_version; /* 1 major version to which the set conforms */ | |
13223 | - __u32 minor_version; /* 2 minor version to which the set conforms */ | |
13224 | - __u32 patch_version; /* 3 patchlevel version to which the set conforms */ | |
13225 | - __u32 gvalid_words; /* 4 Number of non-reserved words in this section */ | |
13226 | - __u32 set_magic; /* 5 Raid set identifier */ | |
13227 | - __u32 ctime; /* 6 Creation time */ | |
13228 | - __u32 level; /* 7 Raid personality (mirroring, raid5, ...) */ | |
13229 | - __u32 size; /* 8 Apparent size of each individual disk, in kB */ | |
13230 | - __u32 nr_disks; /* 9 Number of total disks in the raid set */ | |
13231 | - __u32 raid_disks; /* 10 Number of disks in a fully functional raid set */ | |
13232 | - __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 11]; | |
13233 | - | |
13234 | - /* | |
13235 | - * Generic state information | |
13236 | - */ | |
13237 | - __u32 utime; /* 0 Superblock update time */ | |
13238 | - __u32 state; /* 1 State bits (clean, ...) */ | |
13239 | - __u32 active_disks; /* 2 Number of currently active disks (some non-faulty disks might not be in sync) */ | |
13240 | - __u32 working_disks; /* 3 Number of working disks */ | |
13241 | - __u32 failed_disks; /* 4 Number of failed disks */ | |
13242 | - __u32 spare_disks; /* 5 Number of spare disks */ | |
13243 | - __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 6]; | |
13244 | - | |
13245 | - /* | |
13246 | - * Personality information | |
13247 | - */ | |
13248 | - __u32 parity_algorithm; | |
13249 | - __u32 chunk_size; | |
13250 | - __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 2]; | |
13251 | - | |
13252 | - /* | |
13253 | - * Disks information | |
13254 | - */ | |
13255 | - md_descriptor_t disks[MD_SB_DISKS]; | |
13256 | - | |
13257 | - /* | |
13258 | - * Reserved | |
13259 | - */ | |
13260 | - __u32 reserved[MD_SB_RESERVED_WORDS]; | |
13261 | - | |
13262 | - /* | |
13263 | - * Active descriptor | |
13264 | - */ | |
13265 | - md_descriptor_t descriptor; | |
13266 | -} md_superblock_t; | |
13267 | - | |
13268 | -#ifdef __KERNEL__ | |
13269 | - | |
13270 | -#include <linux/mm.h> | |
13271 | -#include <linux/fs.h> | |
13272 | -#include <linux/blkdev.h> | |
13273 | -#include <asm/semaphore.h> | |
13274 | - | |
13275 | -/* | |
13276 | - * Kernel-based reconstruction is mostly working, but still requires | |
13277 | - * some additional work. | |
13278 | - */ | |
13279 | -#define SUPPORT_RECONSTRUCTION 0 | |
13280 | - | |
13281 | -#define MAX_REAL 8 /* Max number of physical dev per md dev */ | |
13282 | -#define MAX_MD_DEV 4 /* Max number of md dev */ | |
13283 | - | |
13284 | -#define FACTOR(a) ((a)->repartition & FACTOR_MASK) | |
13285 | -#define MAX_FAULT(a) (((a)->repartition & FAULT_MASK)>>8) | |
13286 | -#define PERSONALITY(a) ((a)->repartition & PERSONALITY_MASK) | |
13287 | - | |
13288 | -#define FACTOR_SHIFT(a) (PAGE_SHIFT + (a) - 10) | |
13289 | - | |
13290 | -struct real_dev | |
13291 | -{ | |
13292 | - kdev_t dev; /* Device number */ | |
13293 | - int size; /* Device size (in blocks) */ | |
13294 | - int offset; /* Real device offset (in blocks) in md dev | |
13295 | - (only used in linear mode) */ | |
13296 | - struct inode *inode; /* Lock inode */ | |
13297 | - md_superblock_t *sb; | |
13298 | - u32 sb_offset; | |
13299 | -}; | |
13300 | - | |
13301 | -struct md_dev; | |
13302 | - | |
13303 | -#define SPARE_INACTIVE 0 | |
13304 | -#define SPARE_WRITE 1 | |
13305 | -#define SPARE_ACTIVE 2 | |
13306 | - | |
13307 | -struct md_personality | |
13308 | -{ | |
13309 | - char *name; | |
13310 | - int (*map)(struct md_dev *mddev, kdev_t *rdev, | |
13311 | - unsigned long *rsector, unsigned long size); | |
13312 | - int (*make_request)(struct md_dev *mddev, int rw, struct buffer_head * bh); | |
13313 | - void (*end_request)(struct buffer_head * bh, int uptodate); | |
13314 | - int (*run)(int minor, struct md_dev *mddev); | |
13315 | - int (*stop)(int minor, struct md_dev *mddev); | |
13316 | - int (*status)(char *page, int minor, struct md_dev *mddev); | |
13317 | - int (*ioctl)(struct inode *inode, struct file *file, | |
13318 | - unsigned int cmd, unsigned long arg); | |
13319 | - int max_invalid_dev; | |
13320 | - int (*error_handler)(struct md_dev *mddev, kdev_t dev); | |
13321 | - | |
13322 | -/* | |
13323 | - * Some personalities (RAID-1, RAID-5) can get disks hot-added and | |
13324 | - * hot-removed. Hot removal is different from failure. (failure marks | |
13325 | - * a disk inactive, but the disk is still part of the array) | |
13326 | - */ | |
13327 | - int (*hot_add_disk) (struct md_dev *mddev, kdev_t dev); | |
13328 | - int (*hot_remove_disk) (struct md_dev *mddev, kdev_t dev); | |
13329 | - int (*mark_spare) (struct md_dev *mddev, md_descriptor_t *descriptor, int state); | |
13330 | -}; | |
13331 | - | |
13332 | -struct md_dev | |
13333 | -{ | |
13334 | - struct real_dev devices[MAX_REAL]; | |
13335 | - struct md_personality *pers; | |
13336 | - md_superblock_t *sb; | |
13337 | - int sb_dirty; | |
13338 | - int repartition; | |
13339 | - int busy; | |
13340 | - int nb_dev; | |
13341 | - void *private; | |
13342 | -}; | |
13343 | - | |
13344 | -struct md_thread { | |
13345 | - void (*run) (void *data); | |
13346 | - void *data; | |
13347 | - struct wait_queue *wqueue; | |
13348 | - unsigned long flags; | |
13349 | - struct semaphore *sem; | |
13350 | - struct task_struct *tsk; | |
13351 | -}; | |
13352 | - | |
13353 | -#define THREAD_WAKEUP 0 | |
13354 | - | |
13355 | -extern struct md_dev md_dev[MAX_MD_DEV]; | |
13356 | -extern int md_size[MAX_MD_DEV]; | |
13357 | -extern int md_maxreadahead[MAX_MD_DEV]; | |
13358 | - | |
13359 | -extern char *partition_name (kdev_t dev); | |
13360 | - | |
13361 | -extern int register_md_personality (int p_num, struct md_personality *p); | |
13362 | -extern int unregister_md_personality (int p_num); | |
13363 | -extern struct md_thread *md_register_thread (void (*run) (void *data), void *data); | |
13364 | -extern void md_unregister_thread (struct md_thread *thread); | |
13365 | -extern void md_wakeup_thread(struct md_thread *thread); | |
13366 | -extern int md_update_sb (int minor); | |
13367 | -extern int md_do_sync(struct md_dev *mddev); | |
13368 | - | |
13369 | -#endif __KERNEL__ | |
13370 | -#endif _MD_H | |
13371 | diff -ruN linux.orig/include/linux/raid/hsm.h linux-2.2.16/include/linux/raid/hsm.h | |
13372 | --- linux.orig/include/linux/raid/hsm.h Thu Jan 1 01:00:00 1970 | |
13373 | +++ linux-2.2.16/include/linux/raid/hsm.h Fri Jun 9 11:37:44 2000 | |
13374 | @@ -0,0 +1,65 @@ | |
13375 | +#ifndef _HSM_H | |
13376 | +#define _HSM_H | |
13377 | + | |
13378 | +#include <linux/raid/md.h> | |
13379 | + | |
13380 | +#if __alpha__ | |
13381 | +#error fix cpu_addr on Alpha first | |
13382 | +#endif | |
13383 | + | |
13384 | +#include <linux/raid/hsm_p.h> | |
13385 | + | |
13386 | +#define index_pv(lv,index) ((lv)->vg->pv_array+(index)->data.phys_nr) | |
13387 | +#define index_dev(lv,index) index_pv((lv),(index))->dev | |
13388 | +#define index_block(lv,index) (index)->data.phys_block | |
13389 | +#define index_child(index) ((lv_lptr_t *)((index)->cpu_addr)) | |
13390 | + | |
13391 | +#define ptr_to_cpuaddr(ptr) ((__u32) (ptr)) | |
13392 | + | |
13393 | + | |
13394 | +typedef struct pv_bg_desc_s { | |
13395 | + unsigned int free_blocks; | |
13396 | + pv_block_group_t *bg; | |
13397 | +} pv_bg_desc_t; | |
13398 | + | |
13399 | +typedef struct pv_s pv_t; | |
13400 | +typedef struct vg_s vg_t; | |
13401 | +typedef struct lv_s lv_t; | |
13402 | + | |
13403 | +struct pv_s | |
13404 | +{ | |
13405 | + int phys_nr; | |
13406 | + kdev_t dev; | |
13407 | + pv_sb_t *pv_sb; | |
13408 | + pv_bg_desc_t *bg_array; | |
13409 | +}; | |
13410 | + | |
13411 | +struct lv_s | |
13412 | +{ | |
13413 | + int log_id; | |
13414 | + vg_t *vg; | |
13415 | + | |
13416 | + unsigned int max_indices; | |
13417 | + unsigned int free_indices; | |
13418 | + lv_lptr_t root_index; | |
13419 | + | |
13420 | + kdev_t dev; | |
13421 | +}; | |
13422 | + | |
13423 | +struct vg_s | |
13424 | +{ | |
13425 | + int nr_pv; | |
13426 | + pv_t pv_array [MD_SB_DISKS]; | |
13427 | + | |
13428 | + int nr_lv; | |
13429 | + lv_t lv_array [HSM_MAX_LVS_PER_VG]; | |
13430 | + | |
13431 | + vg_sb_t *vg_sb; | |
13432 | + mddev_t *mddev; | |
13433 | +}; | |
13434 | + | |
13435 | +#define kdev_to_lv(dev) ((lv_t *) mddev_map[MINOR(dev)].data) | |
13436 | +#define mddev_to_vg(mddev) ((vg_t *) mddev->private) | |
13437 | + | |
13438 | +#endif | |
13439 | + | |
13440 | diff -ruN linux.orig/include/linux/raid/hsm_p.h linux-2.2.16/include/linux/raid/hsm_p.h | |
13441 | --- linux.orig/include/linux/raid/hsm_p.h Thu Jan 1 01:00:00 1970 | |
13442 | +++ linux-2.2.16/include/linux/raid/hsm_p.h Fri Jun 9 11:37:44 2000 | |
13443 | @@ -0,0 +1,237 @@ | |
13444 | +#ifndef _HSM_P_H | |
13445 | +#define _HSM_P_H | |
13446 | + | |
13447 | +#define HSM_BLOCKSIZE 4096 | |
13448 | +#define HSM_BLOCKSIZE_WORDS (HSM_BLOCKSIZE/4) | |
13449 | +#define PACKED __attribute__ ((packed)) | |
13450 | + | |
13451 | +/* | |
13452 | + * Identifies a block in physical space | |
13453 | + */ | |
13454 | +typedef struct phys_idx_s { | |
13455 | + __u16 phys_nr; | |
13456 | + __u32 phys_block; | |
13457 | + | |
13458 | +} PACKED phys_idx_t; | |
13459 | + | |
13460 | +/* | |
13461 | + * Identifies a block in logical space | |
13462 | + */ | |
13463 | +typedef struct log_idx_s { | |
13464 | + __u16 log_id; | |
13465 | + __u32 log_index; | |
13466 | + | |
13467 | +} PACKED log_idx_t; | |
13468 | + | |
13469 | +/* | |
13470 | + * Describes one PV | |
13471 | + */ | |
13472 | +#define HSM_PV_SB_MAGIC 0xf091ae9fU | |
13473 | + | |
13474 | +#define HSM_PV_SB_GENERIC_WORDS 32 | |
13475 | +#define HSM_PV_SB_RESERVED_WORDS \ | |
13476 | + (HSM_BLOCKSIZE_WORDS - HSM_PV_SB_GENERIC_WORDS) | |
13477 | + | |
13478 | +/* | |
13479 | + * On-disk PV identification data, on block 0 in any PV. | |
13480 | + */ | |
13481 | +typedef struct pv_sb_s | |
13482 | +{ | |
13483 | + __u32 pv_magic; /* 0 */ | |
13484 | + | |
13485 | + __u32 pv_uuid0; /* 1 */ | |
13486 | + __u32 pv_uuid1; /* 2 */ | |
13487 | + __u32 pv_uuid2; /* 3 */ | |
13488 | + __u32 pv_uuid3; /* 4 */ | |
13489 | + | |
13490 | + __u32 pv_major; /* 5 */ | |
13491 | + __u32 pv_minor; /* 6 */ | |
13492 | + __u32 pv_patch; /* 7 */ | |
13493 | + | |
13494 | + __u32 pv_ctime; /* 8 Creation time */ | |
13495 | + | |
13496 | + __u32 pv_total_size; /* 9 size of this PV, in blocks */ | |
13497 | + __u32 pv_first_free; /* 10 first free block */ | |
13498 | + __u32 pv_first_used; /* 11 first used block */ | |
13499 | + __u32 pv_blocks_left; /* 12 unallocated blocks */ | |
13500 | + __u32 pv_bg_size; /* 13 size of a block group, in blocks */ | |
13501 | + __u32 pv_block_size; /* 14 size of blocks, in bytes */ | |
13502 | + __u32 pv_pptr_size; /* 15 size of block descriptor, in bytes */ | |
13503 | + __u32 pv_block_groups; /* 16 number of block groups */ | |
13504 | + | |
13505 | + __u32 __reserved1[HSM_PV_SB_GENERIC_WORDS - 17]; | |
13506 | + | |
13507 | + /* | |
13508 | + * Reserved | |
13509 | + */ | |
13510 | + __u32 __reserved2[HSM_PV_SB_RESERVED_WORDS]; | |
13511 | + | |
13512 | +} PACKED pv_sb_t; | |
13513 | + | |
13514 | +/* | |
13515 | + * this is pretty much arbitrary, but has to be less than ~64 | |
13516 | + */ | |
13517 | +#define HSM_MAX_LVS_PER_VG 32 | |
13518 | + | |
13519 | +#define HSM_VG_SB_GENERIC_WORDS 32 | |
13520 | + | |
13521 | +#define LV_DESCRIPTOR_WORDS 8 | |
13522 | +#define HSM_VG_SB_RESERVED_WORDS (HSM_BLOCKSIZE_WORDS - \ | |
13523 | + LV_DESCRIPTOR_WORDS*HSM_MAX_LVS_PER_VG - HSM_VG_SB_GENERIC_WORDS) | |
13524 | + | |
13525 | +#if (HSM_PV_SB_RESERVED_WORDS < 0) | |
13526 | +#error you messed this one up dude ... | |
13527 | +#endif | |
13528 | + | |
13529 | +typedef struct lv_descriptor_s | |
13530 | +{ | |
13531 | + __u32 lv_id; /* 0 */ | |
13532 | + phys_idx_t lv_root_idx; /* 1 */ | |
13533 | + __u16 __reserved; /* 2 */ | |
13534 | + __u32 lv_max_indices; /* 3 */ | |
13535 | + __u32 lv_free_indices; /* 4 */ | |
13536 | + __u32 md_id; /* 5 */ | |
13537 | + | |
13538 | + __u32 reserved[LV_DESCRIPTOR_WORDS - 6]; | |
13539 | + | |
13540 | +} PACKED lv_descriptor_t; | |
13541 | + | |
13542 | +#define HSM_VG_SB_MAGIC 0x98320d7aU | |
13543 | +/* | |
13544 | + * On-disk VG identification data, in block 1 on all PVs | |
13545 | + */ | |
13546 | +typedef struct vg_sb_s | |
13547 | +{ | |
13548 | + __u32 vg_magic; /* 0 */ | |
13549 | + __u32 nr_lvs; /* 1 */ | |
13550 | + | |
13551 | + __u32 __reserved1[HSM_VG_SB_GENERIC_WORDS - 2]; | |
13552 | + | |
13553 | + lv_descriptor_t lv_array [HSM_MAX_LVS_PER_VG]; | |
13554 | + /* | |
13555 | + * Reserved | |
13556 | + */ | |
13557 | + __u32 __reserved2[HSM_VG_SB_RESERVED_WORDS]; | |
13558 | + | |
13559 | +} PACKED vg_sb_t; | |
13560 | + | |
13561 | +/* | |
13562 | + * Describes one LV | |
13563 | + */ | |
13564 | + | |
13565 | +#define HSM_LV_SB_MAGIC 0xe182bd8aU | |
13566 | + | |
13567 | +/* do we need lv_sb_t? */ | |
13568 | + | |
13569 | +typedef struct lv_sb_s | |
13570 | +{ | |
13571 | + /* | |
13572 | + * On-disk LV identifier | |
13573 | + */ | |
13574 | + __u32 lv_magic; /* 0 LV identifier */ | |
13575 | + __u32 lv_uuid0; /* 1 */ | |
13576 | + __u32 lv_uuid1; /* 2 */ | |
13577 | + __u32 lv_uuid2; /* 3 */ | |
13578 | + __u32 lv_uuid3; /* 4 */ | |
13579 | + | |
13580 | + __u32 lv_major; /* 5 PV identifier */ | |
13581 | + __u32 lv_minor; /* 6 PV identifier */ | |
13582 | + __u32 lv_patch; /* 7 PV identifier */ | |
13583 | + | |
13584 | + __u32 ctime; /* 8 Creation time */ | |
13585 | + __u32 size; /* 9 size of this LV, in blocks */ | |
13586 | + phys_idx_t start; /* 10 position of root index block */ | |
13587 | + log_idx_t first_free; /* 11-12 first free index */ | |
13588 | + | |
13589 | + /* | |
13590 | + * Reserved | |
13591 | + */ | |
13592 | + __u32 reserved[HSM_BLOCKSIZE_WORDS-13]; | |
13593 | + | |
13594 | +} PACKED lv_sb_t; | |
13595 | + | |
13596 | +/* | |
13597 | + * Pointer pointing from the physical space, points to | |
13598 | + * the LV owning this block. It also contains various | |
13599 | + * statistics about the physical block. | |
13600 | + */ | |
13601 | +typedef struct pv_pptr_s | |
13602 | +{ | |
13603 | + union { | |
13604 | + /* case 1 */ | |
13605 | + struct { | |
13606 | + log_idx_t owner; | |
13607 | + log_idx_t predicted; | |
13608 | + __u32 last_referenced; | |
13609 | + } used; | |
13610 | + /* case 2 */ | |
13611 | + struct { | |
13612 | + __u16 log_id; | |
13613 | + __u16 __unused1; | |
13614 | + __u32 next_free; | |
13615 | + __u32 __unused2; | |
13616 | + __u32 __unused3; | |
13617 | + } free; | |
13618 | + } u; | |
13619 | +} PACKED pv_pptr_t; | |
13620 | + | |
13621 | +static __inline__ int pv_pptr_free (const pv_pptr_t * pptr) | |
13622 | +{ | |
13623 | + return !pptr->u.free.log_id; | |
13624 | +} | |
13625 | + | |
13626 | + | |
13627 | +#define DATA_BLOCKS_PER_BG ((HSM_BLOCKSIZE*8)/(8*sizeof(pv_pptr_t)+1)) | |
13628 | + | |
13629 | +#define TOTAL_BLOCKS_PER_BG (DATA_BLOCKS_PER_BG+1) | |
13630 | +/* | |
13631 | + * A table of pointers filling up a single block, managing | |
13632 | + * the next DATA_BLOCKS_PER_BG physical blocks. Such block | |
13633 | + * groups form the physical space of blocks. | |
13634 | + */ | |
13635 | +typedef struct pv_block_group_s | |
13636 | +{ | |
13637 | + __u8 used_bitmap[(DATA_BLOCKS_PER_BG+7)/8]; | |
13638 | + | |
13639 | + pv_pptr_t blocks[DATA_BLOCKS_PER_BG]; | |
13640 | + | |
13641 | +} PACKED pv_block_group_t; | |
13642 | + | |
13643 | +/* | |
13644 | + * Pointer from the logical space, points to | |
13645 | + * the (PV,block) containing this logical block | |
13646 | + */ | |
13647 | +typedef struct lv_lptr_s | |
13648 | +{ | |
13649 | + phys_idx_t data; | |
13650 | + __u16 __reserved; | |
13651 | + __u32 cpu_addr; | |
13652 | + __u32 __reserved2; | |
13653 | + | |
13654 | +} PACKED lv_lptr_t; | |
13655 | + | |
13656 | +static __inline__ int index_free (const lv_lptr_t * index) | |
13657 | +{ | |
13658 | + return !index->data.phys_block; | |
13659 | +} | |
13660 | + | |
13661 | +static __inline__ int index_present (const lv_lptr_t * index) | |
13662 | +{ | |
13663 | + return index->cpu_addr; | |
13664 | +} | |
13665 | + | |
13666 | + | |
13667 | +#define HSM_LPTRS_PER_BLOCK (HSM_BLOCKSIZE/sizeof(lv_lptr_t)) | |
13668 | +/* | |
13669 | + * A table of pointers filling up a single block, managing | |
13670 | + * HSM_LPTRS_PER_BLOCK logical blocks. Such block groups form | |
13671 | + * the logical space of blocks. | |
13672 | + */ | |
13673 | +typedef struct lv_index_block_s | |
13674 | +{ | |
13675 | + lv_lptr_t blocks[HSM_LPTRS_PER_BLOCK]; | |
13676 | + | |
13677 | +} PACKED lv_index_block_t; | |
13678 | + | |
13679 | +#endif | |
13680 | + | |
13681 | diff -ruN linux.orig/include/linux/raid/linear.h linux-2.2.16/include/linux/raid/linear.h | |
13682 | --- linux.orig/include/linux/raid/linear.h Thu Jan 1 01:00:00 1970 | |
13683 | +++ linux-2.2.16/include/linux/raid/linear.h Fri Jun 9 11:37:44 2000 | |
13684 | @@ -0,0 +1,32 @@ | |
13685 | +#ifndef _LINEAR_H | |
13686 | +#define _LINEAR_H | |
13687 | + | |
13688 | +#include <linux/raid/md.h> | |
13689 | + | |
13690 | +struct dev_info { | |
13691 | + kdev_t dev; | |
13692 | + int size; | |
13693 | + unsigned int offset; | |
13694 | +}; | |
13695 | + | |
13696 | +typedef struct dev_info dev_info_t; | |
13697 | + | |
13698 | +struct linear_hash | |
13699 | +{ | |
13700 | + dev_info_t *dev0, *dev1; | |
13701 | +}; | |
13702 | + | |
13703 | +struct linear_private_data | |
13704 | +{ | |
13705 | + struct linear_hash *hash_table; | |
13706 | + dev_info_t disks[MD_SB_DISKS]; | |
13707 | + dev_info_t *smallest; | |
13708 | + int nr_zones; | |
13709 | +}; | |
13710 | + | |
13711 | + | |
13712 | +typedef struct linear_private_data linear_conf_t; | |
13713 | + | |
13714 | +#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) | |
13715 | + | |
13716 | +#endif | |
13717 | diff -ruN linux.orig/include/linux/raid/md.h linux-2.2.16/include/linux/raid/md.h | |
13718 | --- linux.orig/include/linux/raid/md.h Thu Jan 1 01:00:00 1970 | |
13719 | +++ linux-2.2.16/include/linux/raid/md.h Fri Jun 9 11:37:44 2000 | |
13720 | @@ -0,0 +1,96 @@ | |
13721 | +/* | |
13722 | + md.h : Multiple Devices driver for Linux | |
13723 | + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | |
13724 | + Copyright (C) 1994-96 Marc ZYNGIER | |
13725 | + <zyngier@ufr-info-p7.ibp.fr> or | |
13726 | + <maz@gloups.fdn.fr> | |
13727 | + | |
13728 | + This program is free software; you can redistribute it and/or modify | |
13729 | + it under the terms of the GNU General Public License as published by | |
13730 | + the Free Software Foundation; either version 2, or (at your option) | |
13731 | + any later version. | |
13732 | + | |
13733 | + You should have received a copy of the GNU General Public License | |
13734 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
13735 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
13736 | +*/ | |
13737 | + | |
13738 | +#ifndef _MD_H | |
13739 | +#define _MD_H | |
13740 | + | |
13741 | +#include <linux/mm.h> | |
13742 | +#include <linux/fs.h> | |
13743 | +#include <linux/blkdev.h> | |
13744 | +#include <asm/semaphore.h> | |
13745 | +#include <linux/major.h> | |
13746 | +#include <linux/ioctl.h> | |
13747 | +#include <linux/types.h> | |
13748 | +#include <asm/bitops.h> | |
13749 | +#include <linux/module.h> | |
13750 | +#include <linux/mm.h> | |
13751 | +#include <linux/hdreg.h> | |
13752 | +#include <linux/sysctl.h> | |
13753 | +#include <linux/fs.h> | |
13754 | +#include <linux/proc_fs.h> | |
13755 | +#include <linux/smp_lock.h> | |
13756 | +#include <linux/delay.h> | |
13757 | +#include <net/checksum.h> | |
13758 | +#include <linux/random.h> | |
13759 | +#include <linux/locks.h> | |
13760 | +#include <asm/io.h> | |
13761 | + | |
13762 | +#include <linux/raid/md_compatible.h> | |
13763 | +/* | |
13764 | + * 'md_p.h' holds the 'physical' layout of RAID devices | |
13765 | + * 'md_u.h' holds the user <=> kernel API | |
13766 | + * | |
13767 | + * 'md_k.h' holds kernel internal definitions | |
13768 | + */ | |
13769 | + | |
13770 | +#include <linux/raid/md_p.h> | |
13771 | +#include <linux/raid/md_u.h> | |
13772 | +#include <linux/raid/md_k.h> | |
13773 | + | |
13774 | +/* | |
13775 | + * Different major versions are not compatible. | |
13776 | + * Different minor versions are only downward compatible. | |
13777 | + * Different patchlevel versions are downward and upward compatible. | |
13778 | + */ | |
13779 | +#define MD_MAJOR_VERSION 0 | |
13780 | +#define MD_MINOR_VERSION 90 | |
13781 | +#define MD_PATCHLEVEL_VERSION 0 | |
13782 | + | |
13783 | +extern int md_size[MAX_MD_DEVS]; | |
13784 | +extern struct hd_struct md_hd_struct[MAX_MD_DEVS]; | |
13785 | + | |
13786 | +extern void add_mddev_mapping (mddev_t *mddev, kdev_t dev, void *data); | |
13787 | +extern void del_mddev_mapping (mddev_t *mddev, kdev_t dev); | |
13788 | +extern char * partition_name (kdev_t dev); | |
13789 | +extern int register_md_personality (int p_num, mdk_personality_t *p); | |
13790 | +extern int unregister_md_personality (int p_num); | |
13791 | +extern mdk_thread_t * md_register_thread (void (*run) (void *data), | |
13792 | + void *data, const char *name); | |
13793 | +extern void md_unregister_thread (mdk_thread_t *thread); | |
13794 | +extern void md_wakeup_thread(mdk_thread_t *thread); | |
13795 | +extern void md_interrupt_thread (mdk_thread_t *thread); | |
13796 | +extern int md_update_sb (mddev_t *mddev); | |
13797 | +extern int md_do_sync(mddev_t *mddev, mdp_disk_t *spare); | |
13798 | +extern void md_recover_arrays (void); | |
13799 | +extern int md_check_ordering (mddev_t *mddev); | |
13800 | +extern void autodetect_raid(void); | |
13801 | +extern struct gendisk * find_gendisk (kdev_t dev); | |
13802 | +extern int md_notify_reboot(struct notifier_block *this, | |
13803 | + unsigned long code, void *x); | |
13804 | +#if CONFIG_BLK_DEV_MD | |
13805 | +extern void raid_setup(char *str,int *ints) md__init; | |
13806 | +#endif | |
13807 | +#ifdef CONFIG_MD_BOOT | |
13808 | +extern void md_setup(char *str,int *ints) md__init; | |
13809 | +#endif | |
13810 | + | |
13811 | +extern void md_print_devices (void); | |
13812 | + | |
13813 | +#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | |
13814 | + | |
13815 | +#endif _MD_H | |
13816 | + | |
13817 | diff -ruN linux.orig/include/linux/raid/md_compatible.h linux-2.2.16/include/linux/raid/md_compatible.h | |
13818 | --- linux.orig/include/linux/raid/md_compatible.h Thu Jan 1 01:00:00 1970 | |
13819 | +++ linux-2.2.16/include/linux/raid/md_compatible.h Fri Jun 9 11:37:44 2000 | |
13820 | @@ -0,0 +1,387 @@ | |
13821 | + | |
13822 | +/* | |
13823 | + md.h : Multiple Devices driver compatibility layer for Linux 2.0/2.2 | |
13824 | + Copyright (C) 1998 Ingo Molnar | |
13825 | + | |
13826 | + This program is free software; you can redistribute it and/or modify | |
13827 | + it under the terms of the GNU General Public License as published by | |
13828 | + the Free Software Foundation; either version 2, or (at your option) | |
13829 | + any later version. | |
13830 | + | |
13831 | + You should have received a copy of the GNU General Public License | |
13832 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
13833 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
13834 | +*/ | |
13835 | + | |
13836 | +#include <linux/version.h> | |
13837 | + | |
13838 | +#ifndef _MD_COMPATIBLE_H | |
13839 | +#define _MD_COMPATIBLE_H | |
13840 | + | |
13841 | +#define LinuxVersionCode(v, p, s) (((v)<<16)+((p)<<8)+(s)) | |
13842 | + | |
13843 | +#if LINUX_VERSION_CODE < LinuxVersionCode(2,1,0) | |
13844 | + | |
13845 | +/* 000 */ | |
13846 | +#define md__get_free_pages(x,y) __get_free_pages(x,y,GFP_KERNEL) | |
13847 | + | |
13848 | +#ifdef __i386__ | |
13849 | +/* 001 */ | |
13850 | +extern __inline__ int md_cpu_has_mmx(void) | |
13851 | +{ | |
13852 | + return x86_capability & 0x00800000; | |
13853 | +} | |
13854 | +#endif | |
13855 | + | |
13856 | +/* 002 */ | |
13857 | +#define md_clear_page(page) memset((void *)(page), 0, PAGE_SIZE) | |
13858 | + | |
13859 | +/* 003 */ | |
13860 | +/* | |
13861 | + * someone please suggest a sane compatibility layer for modules | |
13862 | + */ | |
13863 | +#define MD_EXPORT_SYMBOL(x) | |
13864 | + | |
13865 | +/* 004 */ | |
13866 | +static inline unsigned long | |
13867 | +md_copy_from_user(void *to, const void *from, unsigned long n) | |
13868 | +{ | |
13869 | + int err; | |
13870 | + | |
13871 | + err = verify_area(VERIFY_READ,from,n); | |
13872 | + if (!err) | |
13873 | + memcpy_fromfs(to, from, n); | |
13874 | + return err; | |
13875 | +} | |
13876 | + | |
13877 | +/* 005 */ | |
13878 | +extern inline unsigned long | |
13879 | +md_copy_to_user(void *to, const void *from, unsigned long n) | |
13880 | +{ | |
13881 | + int err; | |
13882 | + | |
13883 | + err = verify_area(VERIFY_WRITE,to,n); | |
13884 | + if (!err) | |
13885 | + memcpy_tofs(to, from, n); | |
13886 | + return err; | |
13887 | +} | |
13888 | + | |
13889 | +/* 006 */ | |
13890 | +#define md_put_user(x,ptr) \ | |
13891 | +({ \ | |
13892 | + int __err; \ | |
13893 | + \ | |
13894 | + __err = verify_area(VERIFY_WRITE,ptr,sizeof(*ptr)); \ | |
13895 | + if (!__err) \ | |
13896 | + put_user(x,ptr); \ | |
13897 | + __err; \ | |
13898 | +}) | |
13899 | + | |
13900 | +/* 007 */ | |
13901 | +extern inline int md_capable_admin(void) | |
13902 | +{ | |
13903 | + return suser(); | |
13904 | +} | |
13905 | + | |
13906 | +/* 008 */ | |
13907 | +#define MD_FILE_TO_INODE(file) ((file)->f_inode) | |
13908 | + | |
13909 | +/* 009 */ | |
13910 | +extern inline void md_flush_signals (void) | |
13911 | +{ | |
13912 | + current->signal = 0; | |
13913 | +} | |
13914 | + | |
13915 | +/* 010 */ | |
13916 | +#define __S(nr) (1<<((nr)-1)) | |
13917 | +extern inline void md_init_signals (void) | |
13918 | +{ | |
13919 | + current->exit_signal = SIGCHLD; | |
13920 | + current->blocked = ~(__S(SIGKILL)); | |
13921 | +} | |
13922 | +#undef __S | |
13923 | + | |
13924 | +/* 011 */ | |
13925 | +extern inline unsigned long md_signal_pending (struct task_struct * tsk) | |
13926 | +{ | |
13927 | + return (tsk->signal & ~tsk->blocked); | |
13928 | +} | |
13929 | + | |
13930 | +/* 012 */ | |
13931 | +#define md_set_global_readahead(x) read_ahead[MD_MAJOR] = MD_READAHEAD | |
13932 | + | |
13933 | +/* 013 */ | |
13934 | +#define md_mdelay(n) (\ | |
13935 | + {unsigned long msec=(n); while (msec--) udelay(1000);}) | |
13936 | + | |
13937 | +/* 014 */ | |
13938 | +#define MD_SYS_DOWN 0 | |
13939 | +#define MD_SYS_HALT 0 | |
13940 | +#define MD_SYS_POWER_OFF 0 | |
13941 | + | |
13942 | +/* 015 */ | |
13943 | +#define md_register_reboot_notifier(x) | |
13944 | + | |
13945 | +/* 016 */ | |
13946 | +extern __inline__ unsigned long | |
13947 | +md_test_and_set_bit(int nr, void * addr) | |
13948 | +{ | |
13949 | + unsigned long flags; | |
13950 | + unsigned long oldbit; | |
13951 | + | |
13952 | + save_flags(flags); | |
13953 | + cli(); | |
13954 | + oldbit = test_bit(nr,addr); | |
13955 | + set_bit(nr,addr); | |
13956 | + restore_flags(flags); | |
13957 | + return oldbit; | |
13958 | +} | |
13959 | + | |
13960 | +/* 017 */ | |
13961 | +extern __inline__ unsigned long | |
13962 | +md_test_and_clear_bit(int nr, void * addr) | |
13963 | +{ | |
13964 | + unsigned long flags; | |
13965 | + unsigned long oldbit; | |
13966 | + | |
13967 | + save_flags(flags); | |
13968 | + cli(); | |
13969 | + oldbit = test_bit(nr,addr); | |
13970 | + clear_bit(nr,addr); | |
13971 | + restore_flags(flags); | |
13972 | + return oldbit; | |
13973 | +} | |
13974 | + | |
13975 | +/* 018 */ | |
13976 | +#define md_atomic_read(x) (*(volatile int *)(x)) | |
13977 | +#define md_atomic_set(x,y) (*(volatile int *)(x) = (y)) | |
13978 | + | |
13979 | +/* 019 */ | |
13980 | +extern __inline__ void md_lock_kernel (void) | |
13981 | +{ | |
13982 | +#if __SMP__ | |
13983 | + lock_kernel(); | |
13984 | + syscall_count++; | |
13985 | +#endif | |
13986 | +} | |
13987 | + | |
13988 | +extern __inline__ void md_unlock_kernel (void) | |
13989 | +{ | |
13990 | +#if __SMP__ | |
13991 | + syscall_count--; | |
13992 | + unlock_kernel(); | |
13993 | +#endif | |
13994 | +} | |
13995 | +/* 020 */ | |
13996 | + | |
13997 | +#define md__init | |
13998 | +#define md__initdata | |
13999 | +#define md__initfunc(__arginit) __arginit | |
14000 | + | |
14001 | +/* 021 */ | |
14002 | + | |
14003 | +/* 022 */ | |
14004 | + | |
14005 | +struct md_list_head { | |
14006 | + struct md_list_head *next, *prev; | |
14007 | +}; | |
14008 | + | |
14009 | +#define MD_LIST_HEAD(name) \ | |
14010 | + struct md_list_head name = { &name, &name } | |
14011 | + | |
14012 | +#define MD_INIT_LIST_HEAD(ptr) do { \ | |
14013 | + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ | |
14014 | +} while (0) | |
14015 | + | |
14016 | +static __inline__ void md__list_add(struct md_list_head * new, | |
14017 | + struct md_list_head * prev, | |
14018 | + struct md_list_head * next) | |
14019 | +{ | |
14020 | + next->prev = new; | |
14021 | + new->next = next; | |
14022 | + new->prev = prev; | |
14023 | + prev->next = new; | |
14024 | +} | |
14025 | + | |
14026 | +static __inline__ void md_list_add(struct md_list_head *new, | |
14027 | + struct md_list_head *head) | |
14028 | +{ | |
14029 | + md__list_add(new, head, head->next); | |
14030 | +} | |
14031 | + | |
14032 | +static __inline__ void md__list_del(struct md_list_head * prev, | |
14033 | + struct md_list_head * next) | |
14034 | +{ | |
14035 | + next->prev = prev; | |
14036 | + prev->next = next; | |
14037 | +} | |
14038 | + | |
14039 | +static __inline__ void md_list_del(struct md_list_head *entry) | |
14040 | +{ | |
14041 | + md__list_del(entry->prev, entry->next); | |
14042 | +} | |
14043 | + | |
14044 | +static __inline__ int md_list_empty(struct md_list_head *head) | |
14045 | +{ | |
14046 | + return head->next == head; | |
14047 | +} | |
14048 | + | |
14049 | +#define md_list_entry(ptr, type, member) \ | |
14050 | + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) | |
14051 | + | |
14052 | +/* 023 */ | |
14053 | + | |
14054 | +static __inline__ signed long md_schedule_timeout(signed long timeout) | |
14055 | +{ | |
14056 | + current->timeout = jiffies + timeout; | |
14057 | + schedule(); | |
14058 | + return 0; | |
14059 | +} | |
14060 | + | |
14061 | +/* 024 */ | |
14062 | +#define md_need_resched(tsk) (need_resched) | |
14063 | + | |
14064 | +/* 025 */ | |
14065 | +typedef struct { int gcc_is_buggy; } md_spinlock_t; | |
14066 | +#define MD_SPIN_LOCK_UNLOCKED (md_spinlock_t) { 0 } | |
14067 | + | |
14068 | +#define md_spin_lock_irq cli | |
14069 | +#define md_spin_unlock_irq sti | |
14070 | +#define md_spin_unlock_irqrestore(x,flags) restore_flags(flags) | |
14071 | +#define md_spin_lock_irqsave(x,flags) do { save_flags(flags); cli(); } while (0) | |
14072 | + | |
14073 | +/* END */ | |
14074 | + | |
14075 | +#else | |
14076 | + | |
14077 | +#include <linux/reboot.h> | |
14078 | +#include <linux/vmalloc.h> | |
14079 | + | |
14080 | +/* 000 */ | |
14081 | +#define md__get_free_pages(x,y) __get_free_pages(x,y) | |
14082 | + | |
14083 | +#ifdef __i386__ | |
14084 | +/* 001 */ | |
14085 | +extern __inline__ int md_cpu_has_mmx(void) | |
14086 | +{ | |
14087 | + return boot_cpu_data.x86_capability & X86_FEATURE_MMX; | |
14088 | +} | |
14089 | +#endif | |
14090 | + | |
14091 | +/* 002 */ | |
14092 | +#define md_clear_page(page) clear_page(page) | |
14093 | + | |
14094 | +/* 003 */ | |
14095 | +#define MD_EXPORT_SYMBOL(x) EXPORT_SYMBOL(x) | |
14096 | + | |
14097 | +/* 004 */ | |
14098 | +#define md_copy_to_user(x,y,z) copy_to_user(x,y,z) | |
14099 | + | |
14100 | +/* 005 */ | |
14101 | +#define md_copy_from_user(x,y,z) copy_from_user(x,y,z) | |
14102 | + | |
14103 | +/* 006 */ | |
14104 | +#define md_put_user put_user | |
14105 | + | |
14106 | +/* 007 */ | |
14107 | +extern inline int md_capable_admin(void) | |
14108 | +{ | |
14109 | + return capable(CAP_SYS_ADMIN); | |
14110 | +} | |
14111 | + | |
14112 | +/* 008 */ | |
14113 | +#define MD_FILE_TO_INODE(file) ((file)->f_dentry->d_inode) | |
14114 | + | |
14115 | +/* 009 */ | |
14116 | +extern inline void md_flush_signals (void) | |
14117 | +{ | |
14118 | + spin_lock(¤t->sigmask_lock); | |
14119 | + flush_signals(current); | |
14120 | + spin_unlock(¤t->sigmask_lock); | |
14121 | +} | |
14122 | + | |
14123 | +/* 010 */ | |
14124 | +extern inline void md_init_signals (void) | |
14125 | +{ | |
14126 | + current->exit_signal = SIGCHLD; | |
14127 | + siginitsetinv(¤t->blocked, sigmask(SIGKILL)); | |
14128 | +} | |
14129 | + | |
14130 | +/* 011 */ | |
14131 | +#define md_signal_pending signal_pending | |
14132 | + | |
14133 | +/* 012 */ | |
14134 | +extern inline void md_set_global_readahead(int * table) | |
14135 | +{ | |
14136 | + max_readahead[MD_MAJOR] = table; | |
14137 | +} | |
14138 | + | |
14139 | +/* 013 */ | |
14140 | +#define md_mdelay(x) mdelay(x) | |
14141 | + | |
14142 | +/* 014 */ | |
14143 | +#define MD_SYS_DOWN SYS_DOWN | |
14144 | +#define MD_SYS_HALT SYS_HALT | |
14145 | +#define MD_SYS_POWER_OFF SYS_POWER_OFF | |
14146 | + | |
14147 | +/* 015 */ | |
14148 | +#define md_register_reboot_notifier register_reboot_notifier | |
14149 | + | |
14150 | +/* 016 */ | |
14151 | +#define md_test_and_set_bit test_and_set_bit | |
14152 | + | |
14153 | +/* 017 */ | |
14154 | +#define md_test_and_clear_bit test_and_clear_bit | |
14155 | + | |
14156 | +/* 018 */ | |
14157 | +#define md_atomic_read atomic_read | |
14158 | +#define md_atomic_set atomic_set | |
14159 | + | |
14160 | +/* 019 */ | |
14161 | +#define md_lock_kernel lock_kernel | |
14162 | +#define md_unlock_kernel unlock_kernel | |
14163 | + | |
14164 | +/* 020 */ | |
14165 | + | |
14166 | +#include <linux/init.h> | |
14167 | + | |
14168 | +#define md__init __init | |
14169 | +#define md__initdata __initdata | |
14170 | +#define md__initfunc(__arginit) __initfunc(__arginit) | |
14171 | + | |
14172 | +/* 021 */ | |
14173 | + | |
14174 | + | |
14175 | +/* 022 */ | |
14176 | + | |
14177 | +#define md_list_head list_head | |
14178 | +#define MD_LIST_HEAD(name) LIST_HEAD(name) | |
14179 | +#define MD_INIT_LIST_HEAD(ptr) INIT_LIST_HEAD(ptr) | |
14180 | +#define md_list_add list_add | |
14181 | +#define md_list_del list_del | |
14182 | +#define md_list_empty list_empty | |
14183 | + | |
14184 | +#define md_list_entry(ptr, type, member) list_entry(ptr, type, member) | |
14185 | + | |
14186 | +/* 023 */ | |
14187 | + | |
14188 | +#define md_schedule_timeout schedule_timeout | |
14189 | + | |
14190 | +/* 024 */ | |
14191 | +#define md_need_resched(tsk) ((tsk)->need_resched) | |
14192 | + | |
14193 | +/* 025 */ | |
14194 | +#define md_spinlock_t spinlock_t | |
14195 | +#define MD_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED | |
14196 | + | |
14197 | +#define md_spin_lock_irq spin_lock_irq | |
14198 | +#define md_spin_unlock_irq spin_unlock_irq | |
14199 | +#define md_spin_unlock_irqrestore spin_unlock_irqrestore | |
14200 | +#define md_spin_lock_irqsave spin_lock_irqsave | |
14201 | + | |
14202 | +/* END */ | |
14203 | + | |
14204 | +#endif | |
14205 | + | |
14206 | +#endif _MD_COMPATIBLE_H | |
14207 | + | |
14208 | diff -ruN linux.orig/include/linux/raid/md_k.h linux-2.2.16/include/linux/raid/md_k.h | |
14209 | --- linux.orig/include/linux/raid/md_k.h Thu Jan 1 01:00:00 1970 | |
14210 | +++ linux-2.2.16/include/linux/raid/md_k.h Fri Jun 9 11:37:44 2000 | |
14211 | @@ -0,0 +1,338 @@ | |
14212 | +/* | |
14213 | + md_k.h : kernel internal structure of the Linux MD driver | |
14214 | + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | |
14215 | + | |
14216 | + This program is free software; you can redistribute it and/or modify | |
14217 | + it under the terms of the GNU General Public License as published by | |
14218 | + the Free Software Foundation; either version 2, or (at your option) | |
14219 | + any later version. | |
14220 | + | |
14221 | + You should have received a copy of the GNU General Public License | |
14222 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
14223 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14224 | +*/ | |
14225 | + | |
14226 | +#ifndef _MD_K_H | |
14227 | +#define _MD_K_H | |
14228 | + | |
14229 | +#define MD_RESERVED 0UL | |
14230 | +#define LINEAR 1UL | |
14231 | +#define STRIPED 2UL | |
14232 | +#define RAID0 STRIPED | |
14233 | +#define RAID1 3UL | |
14234 | +#define RAID5 4UL | |
14235 | +#define TRANSLUCENT 5UL | |
14236 | +#define HSM 6UL | |
14237 | +#define MAX_PERSONALITY 7UL | |
14238 | + | |
14239 | +extern inline int pers_to_level (int pers) | |
14240 | +{ | |
14241 | + switch (pers) { | |
14242 | + case HSM: return -3; | |
14243 | + case TRANSLUCENT: return -2; | |
14244 | + case LINEAR: return -1; | |
14245 | + case RAID0: return 0; | |
14246 | + case RAID1: return 1; | |
14247 | + case RAID5: return 5; | |
14248 | + } | |
14249 | + panic("pers_to_level()"); | |
14250 | +} | |
14251 | + | |
14252 | +extern inline int level_to_pers (int level) | |
14253 | +{ | |
14254 | + switch (level) { | |
14255 | + case -3: return HSM; | |
14256 | + case -2: return TRANSLUCENT; | |
14257 | + case -1: return LINEAR; | |
14258 | + case 0: return RAID0; | |
14259 | + case 1: return RAID1; | |
14260 | + case 4: | |
14261 | + case 5: return RAID5; | |
14262 | + } | |
14263 | + return MD_RESERVED; | |
14264 | +} | |
14265 | + | |
14266 | +typedef struct mddev_s mddev_t; | |
14267 | +typedef struct mdk_rdev_s mdk_rdev_t; | |
14268 | + | |
14269 | +#if (MINORBITS != 8) | |
14270 | +#error MD doesnt handle bigger kdev yet | |
14271 | +#endif | |
14272 | + | |
14273 | +#define MAX_REAL 12 /* Max number of disks per md dev */ | |
14274 | +#define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */ | |
14275 | + | |
14276 | +/* | |
14277 | + * Maps a kdev to an mddev/subdev. How 'data' is handled is up to | |
14278 | + * the personality. (eg. HSM uses this to identify individual LVs) | |
14279 | + */ | |
14280 | +typedef struct dev_mapping_s { | |
14281 | + mddev_t *mddev; | |
14282 | + void *data; | |
14283 | +} dev_mapping_t; | |
14284 | + | |
14285 | +extern dev_mapping_t mddev_map [MAX_MD_DEVS]; | |
14286 | + | |
14287 | +extern inline mddev_t * kdev_to_mddev (kdev_t dev) | |
14288 | +{ | |
14289 | + return mddev_map[MINOR(dev)].mddev; | |
14290 | +} | |
14291 | + | |
14292 | +/* | |
14293 | + * options passed in raidrun: | |
14294 | + */ | |
14295 | + | |
14296 | +#define MAX_CHUNK_SIZE (4096*1024) | |
14297 | + | |
14298 | +/* | |
14299 | + * default readahead | |
14300 | + */ | |
14301 | +#define MD_READAHEAD (256 * 512) | |
14302 | + | |
14303 | +extern inline int disk_faulty(mdp_disk_t * d) | |
14304 | +{ | |
14305 | + return d->state & (1 << MD_DISK_FAULTY); | |
14306 | +} | |
14307 | + | |
14308 | +extern inline int disk_active(mdp_disk_t * d) | |
14309 | +{ | |
14310 | + return d->state & (1 << MD_DISK_ACTIVE); | |
14311 | +} | |
14312 | + | |
14313 | +extern inline int disk_sync(mdp_disk_t * d) | |
14314 | +{ | |
14315 | + return d->state & (1 << MD_DISK_SYNC); | |
14316 | +} | |
14317 | + | |
14318 | +extern inline int disk_spare(mdp_disk_t * d) | |
14319 | +{ | |
14320 | + return !disk_sync(d) && !disk_active(d) && !disk_faulty(d); | |
14321 | +} | |
14322 | + | |
14323 | +extern inline int disk_removed(mdp_disk_t * d) | |
14324 | +{ | |
14325 | + return d->state & (1 << MD_DISK_REMOVED); | |
14326 | +} | |
14327 | + | |
14328 | +extern inline void mark_disk_faulty(mdp_disk_t * d) | |
14329 | +{ | |
14330 | + d->state |= (1 << MD_DISK_FAULTY); | |
14331 | +} | |
14332 | + | |
14333 | +extern inline void mark_disk_active(mdp_disk_t * d) | |
14334 | +{ | |
14335 | + d->state |= (1 << MD_DISK_ACTIVE); | |
14336 | +} | |
14337 | + | |
14338 | +extern inline void mark_disk_sync(mdp_disk_t * d) | |
14339 | +{ | |
14340 | + d->state |= (1 << MD_DISK_SYNC); | |
14341 | +} | |
14342 | + | |
14343 | +extern inline void mark_disk_spare(mdp_disk_t * d) | |
14344 | +{ | |
14345 | + d->state = 0; | |
14346 | +} | |
14347 | + | |
14348 | +extern inline void mark_disk_removed(mdp_disk_t * d) | |
14349 | +{ | |
14350 | + d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED); | |
14351 | +} | |
14352 | + | |
14353 | +extern inline void mark_disk_inactive(mdp_disk_t * d) | |
14354 | +{ | |
14355 | + d->state &= ~(1 << MD_DISK_ACTIVE); | |
14356 | +} | |
14357 | + | |
14358 | +extern inline void mark_disk_nonsync(mdp_disk_t * d) | |
14359 | +{ | |
14360 | + d->state &= ~(1 << MD_DISK_SYNC); | |
14361 | +} | |
14362 | + | |
14363 | +/* | |
14364 | + * MD's 'extended' device | |
14365 | + */ | |
14366 | +struct mdk_rdev_s | |
14367 | +{ | |
14368 | + struct md_list_head same_set; /* RAID devices within the same set */ | |
14369 | + struct md_list_head all; /* all RAID devices */ | |
14370 | + struct md_list_head pending; /* undetected RAID devices */ | |
14371 | + | |
14372 | + kdev_t dev; /* Device number */ | |
14373 | + kdev_t old_dev; /* "" when it was last imported */ | |
14374 | + int size; /* Device size (in blocks) */ | |
14375 | + mddev_t *mddev; /* RAID array if running */ | |
14376 | + unsigned long last_events; /* IO event timestamp */ | |
14377 | + | |
14378 | + struct inode *inode; /* Lock inode */ | |
14379 | + struct file filp; /* Lock file */ | |
14380 | + | |
14381 | + mdp_super_t *sb; | |
14382 | + int sb_offset; | |
14383 | + | |
14384 | + int faulty; /* if faulty do not issue IO requests */ | |
14385 | + int desc_nr; /* descriptor index in the superblock */ | |
14386 | +}; | |
14387 | + | |
14388 | + | |
14389 | +/* | |
14390 | + * disk operations in a working array: | |
14391 | + */ | |
14392 | +#define DISKOP_SPARE_INACTIVE 0 | |
14393 | +#define DISKOP_SPARE_WRITE 1 | |
14394 | +#define DISKOP_SPARE_ACTIVE 2 | |
14395 | +#define DISKOP_HOT_REMOVE_DISK 3 | |
14396 | +#define DISKOP_HOT_ADD_DISK 4 | |
14397 | + | |
14398 | +typedef struct mdk_personality_s mdk_personality_t; | |
14399 | + | |
14400 | +struct mddev_s | |
14401 | +{ | |
14402 | + void *private; | |
14403 | + mdk_personality_t *pers; | |
14404 | + int __minor; | |
14405 | + mdp_super_t *sb; | |
14406 | + int nb_dev; | |
14407 | + struct md_list_head disks; | |
14408 | + int sb_dirty; | |
14409 | + mdu_param_t param; | |
14410 | + int ro; | |
14411 | + unsigned int curr_resync; | |
14412 | + unsigned long resync_start; | |
14413 | + char *name; | |
14414 | + int recovery_running; | |
14415 | + struct semaphore reconfig_sem; | |
14416 | + struct semaphore recovery_sem; | |
14417 | + struct semaphore resync_sem; | |
14418 | + struct md_list_head all_mddevs; | |
14419 | +}; | |
14420 | + | |
14421 | +struct mdk_personality_s | |
14422 | +{ | |
14423 | + char *name; | |
14424 | + int (*map)(mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
14425 | + unsigned long *rsector, unsigned long size); | |
14426 | + int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh); | |
14427 | + void (*end_request)(struct buffer_head * bh, int uptodate); | |
14428 | + int (*run)(mddev_t *mddev); | |
14429 | + int (*stop)(mddev_t *mddev); | |
14430 | + int (*status)(char *page, mddev_t *mddev); | |
14431 | + int (*ioctl)(struct inode *inode, struct file *file, | |
14432 | + unsigned int cmd, unsigned long arg); | |
14433 | + int max_invalid_dev; | |
14434 | + int (*error_handler)(mddev_t *mddev, kdev_t dev); | |
14435 | + | |
14436 | +/* | |
14437 | + * Some personalities (RAID-1, RAID-5) can have disks hot-added and | |
14438 | + * hot-removed. Hot removal is different from failure. (failure marks | |
14439 | + * a disk inactive, but the disk is still part of the array) The interface | |
14440 | + * to such operations is the 'pers->diskop()' function, can be NULL. | |
14441 | + * | |
14442 | + * the diskop function can change the pointer pointing to the incoming | |
14443 | + * descriptor, but must do so very carefully. (currently only | |
14444 | + * SPARE_ACTIVE expects such a change) | |
14445 | + */ | |
14446 | + int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state); | |
14447 | + | |
14448 | + int (*stop_resync)(mddev_t *mddev); | |
14449 | + int (*restart_resync)(mddev_t *mddev); | |
14450 | +}; | |
14451 | + | |
14452 | + | |
14453 | +/* | |
14454 | + * Currently we index md_array directly, based on the minor | |
14455 | + * number. This will have to change to dynamic allocation | |
14456 | + * once we start supporting partitioning of md devices. | |
14457 | + */ | |
14458 | +extern inline int mdidx (mddev_t * mddev) | |
14459 | +{ | |
14460 | + return mddev->__minor; | |
14461 | +} | |
14462 | + | |
14463 | +extern inline kdev_t mddev_to_kdev(mddev_t * mddev) | |
14464 | +{ | |
14465 | + return MKDEV(MD_MAJOR, mdidx(mddev)); | |
14466 | +} | |
14467 | + | |
14468 | +extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev); | |
14469 | +extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); | |
14470 | + | |
14471 | +/* | |
14472 | + * iterates through some rdev ringlist. It's safe to remove the | |
14473 | + * current 'rdev'. Dont touch 'tmp' though. | |
14474 | + */ | |
14475 | +#define ITERATE_RDEV_GENERIC(head,field,rdev,tmp) \ | |
14476 | + \ | |
14477 | + for (tmp = head.next; \ | |
14478 | + rdev = md_list_entry(tmp, mdk_rdev_t, field), \ | |
14479 | + tmp = tmp->next, tmp->prev != &head \ | |
14480 | + ; ) | |
14481 | +/* | |
14482 | + * iterates through the 'same array disks' ringlist | |
14483 | + */ | |
14484 | +#define ITERATE_RDEV(mddev,rdev,tmp) \ | |
14485 | + ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp) | |
14486 | + | |
14487 | +/* | |
14488 | + * Same as above, but assumes that the device has rdev->desc_nr numbered | |
14489 | + * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order. | |
14490 | + */ | |
14491 | +#define ITERATE_RDEV_ORDERED(mddev,rdev,i) \ | |
14492 | + for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++) | |
14493 | + | |
14494 | + | |
14495 | +/* | |
14496 | + * Iterates through all 'RAID managed disks' | |
14497 | + */ | |
14498 | +#define ITERATE_RDEV_ALL(rdev,tmp) \ | |
14499 | + ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp) | |
14500 | + | |
14501 | +/* | |
14502 | + * Iterates through 'pending RAID disks' | |
14503 | + */ | |
14504 | +#define ITERATE_RDEV_PENDING(rdev,tmp) \ | |
14505 | + ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp) | |
14506 | + | |
14507 | +/* | |
14508 | + * iterates through all used mddevs in the system. | |
14509 | + */ | |
14510 | +#define ITERATE_MDDEV(mddev,tmp) \ | |
14511 | + \ | |
14512 | + for (tmp = all_mddevs.next; \ | |
14513 | + mddev = md_list_entry(tmp, mddev_t, all_mddevs), \ | |
14514 | + tmp = tmp->next, tmp->prev != &all_mddevs \ | |
14515 | + ; ) | |
14516 | + | |
14517 | +extern inline int lock_mddev (mddev_t * mddev) | |
14518 | +{ | |
14519 | + return down_interruptible(&mddev->reconfig_sem); | |
14520 | +} | |
14521 | + | |
14522 | +extern inline void unlock_mddev (mddev_t * mddev) | |
14523 | +{ | |
14524 | + up(&mddev->reconfig_sem); | |
14525 | +} | |
14526 | + | |
14527 | +#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \ | |
14528 | + x = y; y = __tmp; } while (0) | |
14529 | + | |
14530 | +typedef struct mdk_thread_s { | |
14531 | + void (*run) (void *data); | |
14532 | + void *data; | |
14533 | + struct wait_queue *wqueue; | |
14534 | + unsigned long flags; | |
14535 | + struct semaphore *sem; | |
14536 | + struct task_struct *tsk; | |
14537 | + const char *name; | |
14538 | +} mdk_thread_t; | |
14539 | + | |
14540 | +#define THREAD_WAKEUP 0 | |
14541 | + | |
14542 | +typedef struct dev_name_s { | |
14543 | + struct md_list_head list; | |
14544 | + kdev_t dev; | |
14545 | + char name [MAX_DISKNAME_LEN]; | |
14546 | +} dev_name_t; | |
14547 | + | |
14548 | +#endif _MD_K_H | |
14549 | + | |
14550 | diff -ruN linux.orig/include/linux/raid/md_p.h linux-2.2.16/include/linux/raid/md_p.h | |
14551 | --- linux.orig/include/linux/raid/md_p.h Thu Jan 1 01:00:00 1970 | |
14552 | +++ linux-2.2.16/include/linux/raid/md_p.h Fri Jun 9 11:37:44 2000 | |
14553 | @@ -0,0 +1,161 @@ | |
14554 | +/* | |
14555 | + md_p.h : physical layout of Linux RAID devices | |
14556 | + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | |
14557 | + | |
14558 | + This program is free software; you can redistribute it and/or modify | |
14559 | + it under the terms of the GNU General Public License as published by | |
14560 | + the Free Software Foundation; either version 2, or (at your option) | |
14561 | + any later version. | |
14562 | + | |
14563 | + You should have received a copy of the GNU General Public License | |
14564 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
14565 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14566 | +*/ | |
14567 | + | |
14568 | +#ifndef _MD_P_H | |
14569 | +#define _MD_P_H | |
14570 | + | |
14571 | +/* | |
14572 | + * RAID superblock. | |
14573 | + * | |
14574 | + * The RAID superblock maintains some statistics on each RAID configuration. | |
14575 | + * Each real device in the RAID set contains it near the end of the device. | |
14576 | + * Some of the ideas are copied from the ext2fs implementation. | |
14577 | + * | |
14578 | + * We currently use 4096 bytes as follows: | |
14579 | + * | |
14580 | + * word offset function | |
14581 | + * | |
14582 | + * 0 - 31 Constant generic RAID device information. | |
14583 | + * 32 - 63 Generic state information. | |
14584 | + * 64 - 127 Personality specific information. | |
14585 | + * 128 - 511 12 32-words descriptors of the disks in the raid set. | |
14586 | + * 512 - 911 Reserved. | |
14587 | + * 912 - 1023 Disk specific descriptor. | |
14588 | + */ | |
14589 | + | |
14590 | +/* | |
14591 | + * If x is the real device size in bytes, we return an apparent size of: | |
14592 | + * | |
14593 | + * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES | |
14594 | + * | |
14595 | + * and place the 4kB superblock at offset y. | |
14596 | + */ | |
14597 | +#define MD_RESERVED_BYTES (64 * 1024) | |
14598 | +#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) | |
14599 | +#define MD_RESERVED_BLOCKS (MD_RESERVED_BYTES / BLOCK_SIZE) | |
14600 | + | |
14601 | +#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) | |
14602 | +#define MD_NEW_SIZE_BLOCKS(x) ((x & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS) | |
14603 | + | |
14604 | +#define MD_SB_BYTES 4096 | |
14605 | +#define MD_SB_WORDS (MD_SB_BYTES / 4) | |
14606 | +#define MD_SB_BLOCKS (MD_SB_BYTES / BLOCK_SIZE) | |
14607 | +#define MD_SB_SECTORS (MD_SB_BYTES / 512) | |
14608 | + | |
14609 | +/* | |
14610 | + * The following are counted in 32-bit words | |
14611 | + */ | |
14612 | +#define MD_SB_GENERIC_OFFSET 0 | |
14613 | +#define MD_SB_PERSONALITY_OFFSET 64 | |
14614 | +#define MD_SB_DISKS_OFFSET 128 | |
14615 | +#define MD_SB_DESCRIPTOR_OFFSET 992 | |
14616 | + | |
14617 | +#define MD_SB_GENERIC_CONSTANT_WORDS 32 | |
14618 | +#define MD_SB_GENERIC_STATE_WORDS 32 | |
14619 | +#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) | |
14620 | +#define MD_SB_PERSONALITY_WORDS 64 | |
14621 | +#define MD_SB_DISKS_WORDS 384 | |
14622 | +#define MD_SB_DESCRIPTOR_WORDS 32 | |
14623 | +#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) | |
14624 | +#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) | |
14625 | +#define MD_SB_DISKS (MD_SB_DISKS_WORDS / MD_SB_DESCRIPTOR_WORDS) | |
14626 | + | |
14627 | +/* | |
14628 | + * Device "operational" state bits | |
14629 | + */ | |
14630 | +#define MD_DISK_FAULTY 0 /* disk is faulty / operational */ | |
14631 | +#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ | |
14632 | +#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ | |
14633 | +#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ | |
14634 | + | |
14635 | +typedef struct mdp_device_descriptor_s { | |
14636 | + __u32 number; /* 0 Device number in the entire set */ | |
14637 | + __u32 major; /* 1 Device major number */ | |
14638 | + __u32 minor; /* 2 Device minor number */ | |
14639 | + __u32 raid_disk; /* 3 The role of the device in the raid set */ | |
14640 | + __u32 state; /* 4 Operational state */ | |
14641 | + __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; | |
14642 | +} mdp_disk_t; | |
14643 | + | |
14644 | +#define MD_SB_MAGIC 0xa92b4efc | |
14645 | + | |
14646 | +/* | |
14647 | + * Superblock state bits | |
14648 | + */ | |
14649 | +#define MD_SB_CLEAN 0 | |
14650 | +#define MD_SB_ERRORS 1 | |
14651 | + | |
14652 | +typedef struct mdp_superblock_s { | |
14653 | + /* | |
14654 | + * Constant generic information | |
14655 | + */ | |
14656 | + __u32 md_magic; /* 0 MD identifier */ | |
14657 | + __u32 major_version; /* 1 major version to which the set conforms */ | |
14658 | + __u32 minor_version; /* 2 minor version ... */ | |
14659 | + __u32 patch_version; /* 3 patchlevel version ... */ | |
14660 | + __u32 gvalid_words; /* 4 Number of used words in this section */ | |
14661 | + __u32 set_uuid0; /* 5 Raid set identifier */ | |
14662 | + __u32 ctime; /* 6 Creation time */ | |
14663 | + __u32 level; /* 7 Raid personality */ | |
14664 | + __u32 size; /* 8 Apparent size of each individual disk */ | |
14665 | + __u32 nr_disks; /* 9 total disks in the raid set */ | |
14666 | + __u32 raid_disks; /* 10 disks in a fully functional raid set */ | |
14667 | + __u32 md_minor; /* 11 preferred MD minor device number */ | |
14668 | + __u32 not_persistent; /* 12 does it have a persistent superblock */ | |
14669 | + __u32 set_uuid1; /* 13 Raid set identifier #2 */ | |
14670 | + __u32 set_uuid2; /* 14 Raid set identifier #3 */ | |
14671 | + __u32 set_uuid3; /* 14 Raid set identifier #4 */ | |
14672 | + __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16]; | |
14673 | + | |
14674 | + /* | |
14675 | + * Generic state information | |
14676 | + */ | |
14677 | + __u32 utime; /* 0 Superblock update time */ | |
14678 | + __u32 state; /* 1 State bits (clean, ...) */ | |
14679 | + __u32 active_disks; /* 2 Number of currently active disks */ | |
14680 | + __u32 working_disks; /* 3 Number of working disks */ | |
14681 | + __u32 failed_disks; /* 4 Number of failed disks */ | |
14682 | + __u32 spare_disks; /* 5 Number of spare disks */ | |
14683 | + __u32 sb_csum; /* 6 checksum of the whole superblock */ | |
14684 | + __u64 events; /* 7 number of superblock updates (64-bit!) */ | |
14685 | + __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9]; | |
14686 | + | |
14687 | + /* | |
14688 | + * Personality information | |
14689 | + */ | |
14690 | + __u32 layout; /* 0 the array's physical layout */ | |
14691 | + __u32 chunk_size; /* 1 chunk size in bytes */ | |
14692 | + __u32 root_pv; /* 2 LV root PV */ | |
14693 | + __u32 root_block; /* 3 LV root block */ | |
14694 | + __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4]; | |
14695 | + | |
14696 | + /* | |
14697 | + * Disks information | |
14698 | + */ | |
14699 | + mdp_disk_t disks[MD_SB_DISKS]; | |
14700 | + | |
14701 | + /* | |
14702 | + * Reserved | |
14703 | + */ | |
14704 | + __u32 reserved[MD_SB_RESERVED_WORDS]; | |
14705 | + | |
14706 | + /* | |
14707 | + * Active descriptor | |
14708 | + */ | |
14709 | + mdp_disk_t this_disk; | |
14710 | + | |
14711 | +} mdp_super_t; | |
14712 | + | |
14713 | +#endif _MD_P_H | |
14714 | + | |
14715 | diff -ruN linux.orig/include/linux/raid/md_u.h linux-2.2.16/include/linux/raid/md_u.h | |
14716 | --- linux.orig/include/linux/raid/md_u.h Thu Jan 1 01:00:00 1970 | |
14717 | +++ linux-2.2.16/include/linux/raid/md_u.h Fri Jun 9 11:37:44 2000 | |
14718 | @@ -0,0 +1,115 @@ | |
14719 | +/* | |
14720 | + md_u.h : user <=> kernel API between Linux raidtools and RAID drivers | |
14721 | + Copyright (C) 1998 Ingo Molnar | |
14722 | + | |
14723 | + This program is free software; you can redistribute it and/or modify | |
14724 | + it under the terms of the GNU General Public License as published by | |
14725 | + the Free Software Foundation; either version 2, or (at your option) | |
14726 | + any later version. | |
14727 | + | |
14728 | + You should have received a copy of the GNU General Public License | |
14729 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
14730 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14731 | +*/ | |
14732 | + | |
14733 | +#ifndef _MD_U_H | |
14734 | +#define _MD_U_H | |
14735 | + | |
14736 | +/* ioctls */ | |
14737 | + | |
14738 | +/* status */ | |
14739 | +#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) | |
14740 | +#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) | |
14741 | +#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) | |
14742 | +#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) | |
14743 | + | |
14744 | +/* configuration */ | |
14745 | +#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) | |
14746 | +#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t) | |
14747 | +#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22) | |
14748 | +#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t) | |
14749 | +#define SET_DISK_INFO _IO (MD_MAJOR, 0x24) | |
14750 | +#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25) | |
14751 | +#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26) | |
14752 | +#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) | |
14753 | +#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) | |
14754 | +#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) | |
14755 | + | |
14756 | +/* usage */ | |
14757 | +#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) | |
14758 | +#define START_ARRAY _IO (MD_MAJOR, 0x31) | |
14759 | +#define STOP_ARRAY _IO (MD_MAJOR, 0x32) | |
14760 | +#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) | |
14761 | +#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) | |
14762 | + | |
14763 | +typedef struct mdu_version_s { | |
14764 | + int major; | |
14765 | + int minor; | |
14766 | + int patchlevel; | |
14767 | +} mdu_version_t; | |
14768 | + | |
14769 | +typedef struct mdu_array_info_s { | |
14770 | + /* | |
14771 | + * Generic constant information | |
14772 | + */ | |
14773 | + int major_version; | |
14774 | + int minor_version; | |
14775 | + int patch_version; | |
14776 | + int ctime; | |
14777 | + int level; | |
14778 | + int size; | |
14779 | + int nr_disks; | |
14780 | + int raid_disks; | |
14781 | + int md_minor; | |
14782 | + int not_persistent; | |
14783 | + | |
14784 | + /* | |
14785 | + * Generic state information | |
14786 | + */ | |
14787 | + int utime; /* 0 Superblock update time */ | |
14788 | + int state; /* 1 State bits (clean, ...) */ | |
14789 | + int active_disks; /* 2 Number of currently active disks */ | |
14790 | + int working_disks; /* 3 Number of working disks */ | |
14791 | + int failed_disks; /* 4 Number of failed disks */ | |
14792 | + int spare_disks; /* 5 Number of spare disks */ | |
14793 | + | |
14794 | + /* | |
14795 | + * Personality information | |
14796 | + */ | |
14797 | + int layout; /* 0 the array's physical layout */ | |
14798 | + int chunk_size; /* 1 chunk size in bytes */ | |
14799 | + | |
14800 | +} mdu_array_info_t; | |
14801 | + | |
14802 | +typedef struct mdu_disk_info_s { | |
14803 | + /* | |
14804 | + * configuration/status of one particular disk | |
14805 | + */ | |
14806 | + int number; | |
14807 | + int major; | |
14808 | + int minor; | |
14809 | + int raid_disk; | |
14810 | + int state; | |
14811 | + | |
14812 | +} mdu_disk_info_t; | |
14813 | + | |
14814 | +typedef struct mdu_start_info_s { | |
14815 | + /* | |
14816 | + * configuration/status of one particular disk | |
14817 | + */ | |
14818 | + int major; | |
14819 | + int minor; | |
14820 | + int raid_disk; | |
14821 | + int state; | |
14822 | + | |
14823 | +} mdu_start_info_t; | |
14824 | + | |
14825 | +typedef struct mdu_param_s | |
14826 | +{ | |
14827 | + int personality; /* 1,2,3,4 */ | |
14828 | + int chunk_size; /* in bytes */ | |
14829 | + int max_fault; /* unused for now */ | |
14830 | +} mdu_param_t; | |
14831 | + | |
14832 | +#endif _MD_U_H | |
14833 | + | |
14834 | diff -ruN linux.orig/include/linux/raid/raid0.h linux-2.2.16/include/linux/raid/raid0.h | |
14835 | --- linux.orig/include/linux/raid/raid0.h Thu Jan 1 01:00:00 1970 | |
14836 | +++ linux-2.2.16/include/linux/raid/raid0.h Fri Jun 9 11:37:44 2000 | |
14837 | @@ -0,0 +1,33 @@ | |
14838 | +#ifndef _RAID0_H | |
14839 | +#define _RAID0_H | |
14840 | + | |
14841 | +#include <linux/raid/md.h> | |
14842 | + | |
14843 | +struct strip_zone | |
14844 | +{ | |
14845 | + int zone_offset; /* Zone offset in md_dev */ | |
14846 | + int dev_offset; /* Zone offset in real dev */ | |
14847 | + int size; /* Zone size */ | |
14848 | + int nb_dev; /* # of devices attached to the zone */ | |
14849 | + mdk_rdev_t *dev[MAX_REAL]; /* Devices attached to the zone */ | |
14850 | +}; | |
14851 | + | |
14852 | +struct raid0_hash | |
14853 | +{ | |
14854 | + struct strip_zone *zone0, *zone1; | |
14855 | +}; | |
14856 | + | |
14857 | +struct raid0_private_data | |
14858 | +{ | |
14859 | + struct raid0_hash *hash_table; /* Dynamically allocated */ | |
14860 | + struct strip_zone *strip_zone; /* This one too */ | |
14861 | + int nr_strip_zones; | |
14862 | + struct strip_zone *smallest; | |
14863 | + int nr_zones; | |
14864 | +}; | |
14865 | + | |
14866 | +typedef struct raid0_private_data raid0_conf_t; | |
14867 | + | |
14868 | +#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) | |
14869 | + | |
14870 | +#endif | |
14871 | diff -ruN linux.orig/include/linux/raid/raid1.h linux-2.2.16/include/linux/raid/raid1.h | |
14872 | --- linux.orig/include/linux/raid/raid1.h Thu Jan 1 01:00:00 1970 | |
14873 | +++ linux-2.2.16/include/linux/raid/raid1.h Fri Jun 9 11:37:44 2000 | |
14874 | @@ -0,0 +1,64 @@ | |
14875 | +#ifndef _RAID1_H | |
14876 | +#define _RAID1_H | |
14877 | + | |
14878 | +#include <linux/raid/md.h> | |
14879 | + | |
14880 | +struct mirror_info { | |
14881 | + int number; | |
14882 | + int raid_disk; | |
14883 | + kdev_t dev; | |
14884 | + int next; | |
14885 | + int sect_limit; | |
14886 | + | |
14887 | + /* | |
14888 | + * State bits: | |
14889 | + */ | |
14890 | + int operational; | |
14891 | + int write_only; | |
14892 | + int spare; | |
14893 | + | |
14894 | + int used_slot; | |
14895 | +}; | |
14896 | + | |
14897 | +struct raid1_private_data { | |
14898 | + mddev_t *mddev; | |
14899 | + struct mirror_info mirrors[MD_SB_DISKS]; | |
14900 | + int nr_disks; | |
14901 | + int raid_disks; | |
14902 | + int working_disks; | |
14903 | + int last_used; | |
14904 | + unsigned long next_sect; | |
14905 | + int sect_count; | |
14906 | + mdk_thread_t *thread, *resync_thread; | |
14907 | + int resync_mirrors; | |
14908 | + struct mirror_info *spare; | |
14909 | +}; | |
14910 | + | |
14911 | +typedef struct raid1_private_data raid1_conf_t; | |
14912 | + | |
14913 | +/* | |
14914 | + * this is the only point in the RAID code where we violate | |
14915 | + * C type safety. mddev->private is an 'opaque' pointer. | |
14916 | + */ | |
14917 | +#define mddev_to_conf(mddev) ((raid1_conf_t *) mddev->private) | |
14918 | + | |
14919 | +/* | |
14920 | + * this is our 'private' 'collective' RAID1 buffer head. | |
14921 | + * it contains information about what kind of IO operations were started | |
14922 | + * for this RAID1 operation, and about their status: | |
14923 | + */ | |
14924 | + | |
14925 | +struct raid1_bh { | |
14926 | + atomic_t remaining; /* 'have we finished' count, | |
14927 | + * used from IRQ handlers | |
14928 | + */ | |
14929 | + int cmd; | |
14930 | + unsigned long state; | |
14931 | + mddev_t *mddev; | |
14932 | + struct buffer_head *master_bh; | |
14933 | + struct buffer_head *mirror_bh [MD_SB_DISKS]; | |
14934 | + struct buffer_head bh_req; | |
14935 | + struct buffer_head *next_retry; | |
14936 | +}; | |
14937 | + | |
14938 | +#endif | |
14939 | diff -ruN linux.orig/include/linux/raid/raid5.h linux-2.2.16/include/linux/raid/raid5.h | |
14940 | --- linux.orig/include/linux/raid/raid5.h Thu Jan 1 01:00:00 1970 | |
14941 | +++ linux-2.2.16/include/linux/raid/raid5.h Fri Jun 9 11:37:44 2000 | |
14942 | @@ -0,0 +1,113 @@ | |
14943 | +#ifndef _RAID5_H | |
14944 | +#define _RAID5_H | |
14945 | + | |
14946 | +#include <linux/raid/md.h> | |
14947 | +#include <linux/raid/xor.h> | |
14948 | + | |
14949 | +struct disk_info { | |
14950 | + kdev_t dev; | |
14951 | + int operational; | |
14952 | + int number; | |
14953 | + int raid_disk; | |
14954 | + int write_only; | |
14955 | + int spare; | |
14956 | + int used_slot; | |
14957 | +}; | |
14958 | + | |
14959 | +struct stripe_head { | |
14960 | + md_spinlock_t stripe_lock; | |
14961 | + struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ | |
14962 | + struct stripe_head *free_next; /* pool of free sh's */ | |
14963 | + struct buffer_head *buffer_pool; /* pool of free buffers */ | |
14964 | + struct buffer_head *bh_pool; /* pool of free bh's */ | |
14965 | + struct raid5_private_data *raid_conf; | |
14966 | + struct buffer_head *bh_old[MD_SB_DISKS]; /* disk image */ | |
14967 | + struct buffer_head *bh_new[MD_SB_DISKS]; /* buffers of the MD device (present in buffer cache) */ | |
14968 | + struct buffer_head *bh_copy[MD_SB_DISKS]; /* copy on write of bh_new (bh_new can change from under us) */ | |
14969 | + struct buffer_head *bh_req[MD_SB_DISKS]; /* copy of bh_new (only the buffer heads), queued to the lower levels */ | |
14970 | + int cmd_new[MD_SB_DISKS]; /* READ/WRITE for new */ | |
14971 | + int new[MD_SB_DISKS]; /* buffer added since the last handle_stripe() */ | |
14972 | + unsigned long sector; /* sector of this row */ | |
14973 | + int size; /* buffers size */ | |
14974 | + int pd_idx; /* parity disk index */ | |
14975 | + atomic_t nr_pending; /* nr of pending cmds */ | |
14976 | + unsigned long state; /* state flags */ | |
14977 | + int cmd; /* stripe cmd */ | |
14978 | + int count; /* nr of waiters */ | |
14979 | + int write_method; /* reconstruct-write / read-modify-write */ | |
14980 | + int phase; /* PHASE_BEGIN, ..., PHASE_COMPLETE */ | |
14981 | + struct wait_queue *wait; /* processes waiting for this stripe */ | |
14982 | +}; | |
14983 | + | |
14984 | +/* | |
14985 | + * Phase | |
14986 | + */ | |
14987 | +#define PHASE_BEGIN 0 | |
14988 | +#define PHASE_READ_OLD 1 | |
14989 | +#define PHASE_WRITE 2 | |
14990 | +#define PHASE_READ 3 | |
14991 | +#define PHASE_COMPLETE 4 | |
14992 | + | |
14993 | +/* | |
14994 | + * Write method | |
14995 | + */ | |
14996 | +#define METHOD_NONE 0 | |
14997 | +#define RECONSTRUCT_WRITE 1 | |
14998 | +#define READ_MODIFY_WRITE 2 | |
14999 | + | |
15000 | +/* | |
15001 | + * Stripe state | |
15002 | + */ | |
15003 | +#define STRIPE_LOCKED 0 | |
15004 | +#define STRIPE_ERROR 1 | |
15005 | + | |
15006 | +/* | |
15007 | + * Stripe commands | |
15008 | + */ | |
15009 | +#define STRIPE_NONE 0 | |
15010 | +#define STRIPE_WRITE 1 | |
15011 | +#define STRIPE_READ 2 | |
15012 | + | |
15013 | +struct raid5_private_data { | |
15014 | + struct stripe_head **stripe_hashtbl; | |
15015 | + mddev_t *mddev; | |
15016 | + mdk_thread_t *thread, *resync_thread; | |
15017 | + struct disk_info disks[MD_SB_DISKS]; | |
15018 | + struct disk_info *spare; | |
15019 | + int buffer_size; | |
15020 | + int chunk_size, level, algorithm; | |
15021 | + int raid_disks, working_disks, failed_disks; | |
15022 | + int sector_count; | |
15023 | + unsigned long next_sector; | |
15024 | + atomic_t nr_handle; | |
15025 | + struct stripe_head *next_free_stripe; | |
15026 | + int nr_stripes; | |
15027 | + int resync_parity; | |
15028 | + int max_nr_stripes; | |
15029 | + int clock; | |
15030 | + int nr_hashed_stripes; | |
15031 | + int nr_locked_stripes; | |
15032 | + int nr_pending_stripes; | |
15033 | + int nr_cached_stripes; | |
15034 | + | |
15035 | + /* | |
15036 | + * Free stripes pool | |
15037 | + */ | |
15038 | + int nr_free_sh; | |
15039 | + struct stripe_head *free_sh_list; | |
15040 | + struct wait_queue *wait_for_stripe; | |
15041 | +}; | |
15042 | + | |
15043 | +typedef struct raid5_private_data raid5_conf_t; | |
15044 | + | |
15045 | +#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) | |
15046 | + | |
15047 | +/* | |
15048 | + * Our supported algorithms | |
15049 | + */ | |
15050 | +#define ALGORITHM_LEFT_ASYMMETRIC 0 | |
15051 | +#define ALGORITHM_RIGHT_ASYMMETRIC 1 | |
15052 | +#define ALGORITHM_LEFT_SYMMETRIC 2 | |
15053 | +#define ALGORITHM_RIGHT_SYMMETRIC 3 | |
15054 | + | |
15055 | +#endif | |
15056 | diff -ruN linux.orig/include/linux/raid/translucent.h linux-2.2.16/include/linux/raid/translucent.h | |
15057 | --- linux.orig/include/linux/raid/translucent.h Thu Jan 1 01:00:00 1970 | |
15058 | +++ linux-2.2.16/include/linux/raid/translucent.h Fri Jun 9 11:37:44 2000 | |
15059 | @@ -0,0 +1,23 @@ | |
15060 | +#ifndef _TRANSLUCENT_H | |
15061 | +#define _TRANSLUCENT_H | |
15062 | + | |
15063 | +#include <linux/raid/md.h> | |
15064 | + | |
15065 | +typedef struct dev_info dev_info_t; | |
15066 | + | |
15067 | +struct dev_info { | |
15068 | + kdev_t dev; | |
15069 | + int size; | |
15070 | +}; | |
15071 | + | |
15072 | +struct translucent_private_data | |
15073 | +{ | |
15074 | + dev_info_t disks[MD_SB_DISKS]; | |
15075 | +}; | |
15076 | + | |
15077 | + | |
15078 | +typedef struct translucent_private_data translucent_conf_t; | |
15079 | + | |
15080 | +#define mddev_to_conf(mddev) ((translucent_conf_t *) mddev->private) | |
15081 | + | |
15082 | +#endif | |
15083 | diff -ruN linux.orig/include/linux/raid/xor.h linux-2.2.16/include/linux/raid/xor.h | |
15084 | --- linux.orig/include/linux/raid/xor.h Thu Jan 1 01:00:00 1970 | |
15085 | +++ linux-2.2.16/include/linux/raid/xor.h Fri Jun 9 11:37:44 2000 | |
15086 | @@ -0,0 +1,12 @@ | |
15087 | +#ifndef _XOR_H | |
15088 | +#define _XOR_H | |
15089 | + | |
15090 | +#include <linux/raid/md.h> | |
15091 | + | |
15092 | +#define MAX_XOR_BLOCKS 5 | |
15093 | + | |
15094 | +extern void calibrate_xor_block(void); | |
15095 | +extern void (*xor_block)(unsigned int count, | |
15096 | + struct buffer_head **bh_ptr); | |
15097 | + | |
15098 | +#endif | |
15099 | diff -ruN linux.orig/include/linux/raid0.h linux-2.2.16/include/linux/raid0.h | |
15100 | --- linux.orig/include/linux/raid0.h Tue Oct 29 14:20:24 1996 | |
15101 | +++ linux-2.2.16/include/linux/raid0.h Thu Jan 1 01:00:00 1970 | |
15102 | @@ -1,27 +0,0 @@ | |
15103 | -#ifndef _RAID0_H | |
15104 | -#define _RAID0_H | |
15105 | - | |
15106 | -struct strip_zone | |
15107 | -{ | |
15108 | - int zone_offset; /* Zone offset in md_dev */ | |
15109 | - int dev_offset; /* Zone offset in real dev */ | |
15110 | - int size; /* Zone size */ | |
15111 | - int nb_dev; /* Number of devices attached to the zone */ | |
15112 | - struct real_dev *dev[MAX_REAL]; /* Devices attached to the zone */ | |
15113 | -}; | |
15114 | - | |
15115 | -struct raid0_hash | |
15116 | -{ | |
15117 | - struct strip_zone *zone0, *zone1; | |
15118 | -}; | |
15119 | - | |
15120 | -struct raid0_data | |
15121 | -{ | |
15122 | - struct raid0_hash *hash_table; /* Dynamically allocated */ | |
15123 | - struct strip_zone *strip_zone; /* This one too */ | |
15124 | - int nr_strip_zones; | |
15125 | - struct strip_zone *smallest; | |
15126 | - int nr_zones; | |
15127 | -}; | |
15128 | - | |
15129 | -#endif | |
15130 | diff -ruN linux.orig/include/linux/raid1.h linux-2.2.16/include/linux/raid1.h | |
15131 | --- linux.orig/include/linux/raid1.h Fri May 8 09:17:13 1998 | |
15132 | +++ linux-2.2.16/include/linux/raid1.h Thu Jan 1 01:00:00 1970 | |
15133 | @@ -1,49 +0,0 @@ | |
15134 | -#ifndef _RAID1_H | |
15135 | -#define _RAID1_H | |
15136 | - | |
15137 | -#include <linux/md.h> | |
15138 | - | |
15139 | -struct mirror_info { | |
15140 | - int number; | |
15141 | - int raid_disk; | |
15142 | - kdev_t dev; | |
15143 | - int next; | |
15144 | - int sect_limit; | |
15145 | - | |
15146 | - /* | |
15147 | - * State bits: | |
15148 | - */ | |
15149 | - int operational; | |
15150 | - int write_only; | |
15151 | - int spare; | |
15152 | -}; | |
15153 | - | |
15154 | -struct raid1_data { | |
15155 | - struct md_dev *mddev; | |
15156 | - struct mirror_info mirrors[MD_SB_DISKS]; /* RAID1 devices, 2 to MD_SB_DISKS */ | |
15157 | - int raid_disks; | |
15158 | - int working_disks; /* Number of working disks */ | |
15159 | - int last_used; | |
15160 | - unsigned long next_sect; | |
15161 | - int sect_count; | |
15162 | - int resync_running; | |
15163 | -}; | |
15164 | - | |
15165 | -/* | |
15166 | - * this is our 'private' 'collective' RAID1 buffer head. | |
15167 | - * it contains information about what kind of IO operations were started | |
15168 | - * for this RAID5 operation, and about their status: | |
15169 | - */ | |
15170 | - | |
15171 | -struct raid1_bh { | |
15172 | - unsigned int remaining; | |
15173 | - int cmd; | |
15174 | - unsigned long state; | |
15175 | - struct md_dev *mddev; | |
15176 | - struct buffer_head *master_bh; | |
15177 | - struct buffer_head *mirror_bh [MD_SB_DISKS]; | |
15178 | - struct buffer_head bh_req; | |
15179 | - struct buffer_head *next_retry; | |
15180 | -}; | |
15181 | - | |
15182 | -#endif | |
15183 | diff -ruN linux.orig/include/linux/raid5.h linux-2.2.16/include/linux/raid5.h | |
15184 | --- linux.orig/include/linux/raid5.h Fri May 8 09:17:13 1998 | |
15185 | +++ linux-2.2.16/include/linux/raid5.h Thu Jan 1 01:00:00 1970 | |
15186 | @@ -1,110 +0,0 @@ | |
15187 | -#ifndef _RAID5_H | |
15188 | -#define _RAID5_H | |
15189 | - | |
15190 | -#ifdef __KERNEL__ | |
15191 | -#include <linux/md.h> | |
15192 | -#include <asm/atomic.h> | |
15193 | - | |
15194 | -struct disk_info { | |
15195 | - kdev_t dev; | |
15196 | - int operational; | |
15197 | - int number; | |
15198 | - int raid_disk; | |
15199 | - int write_only; | |
15200 | - int spare; | |
15201 | -}; | |
15202 | - | |
15203 | -struct stripe_head { | |
15204 | - struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ | |
15205 | - struct stripe_head *free_next; /* pool of free sh's */ | |
15206 | - struct buffer_head *buffer_pool; /* pool of free buffers */ | |
15207 | - struct buffer_head *bh_pool; /* pool of free bh's */ | |
15208 | - struct raid5_data *raid_conf; | |
15209 | - struct buffer_head *bh_old[MD_SB_DISKS]; /* disk image */ | |
15210 | - struct buffer_head *bh_new[MD_SB_DISKS]; /* buffers of the MD device (present in buffer cache) */ | |
15211 | - struct buffer_head *bh_copy[MD_SB_DISKS]; /* copy on write of bh_new (bh_new can change from under us) */ | |
15212 | - struct buffer_head *bh_req[MD_SB_DISKS]; /* copy of bh_new (only the buffer heads), queued to the lower levels */ | |
15213 | - int cmd_new[MD_SB_DISKS]; /* READ/WRITE for new */ | |
15214 | - int new[MD_SB_DISKS]; /* buffer added since the last handle_stripe() */ | |
15215 | - unsigned long sector; /* sector of this row */ | |
15216 | - int size; /* buffers size */ | |
15217 | - int pd_idx; /* parity disk index */ | |
15218 | - int nr_pending; /* nr of pending cmds */ | |
15219 | - unsigned long state; /* state flags */ | |
15220 | - int cmd; /* stripe cmd */ | |
15221 | - int count; /* nr of waiters */ | |
15222 | - int write_method; /* reconstruct-write / read-modify-write */ | |
15223 | - int phase; /* PHASE_BEGIN, ..., PHASE_COMPLETE */ | |
15224 | - struct wait_queue *wait; /* processes waiting for this stripe */ | |
15225 | -}; | |
15226 | - | |
15227 | -/* | |
15228 | - * Phase | |
15229 | - */ | |
15230 | -#define PHASE_BEGIN 0 | |
15231 | -#define PHASE_READ_OLD 1 | |
15232 | -#define PHASE_WRITE 2 | |
15233 | -#define PHASE_READ 3 | |
15234 | -#define PHASE_COMPLETE 4 | |
15235 | - | |
15236 | -/* | |
15237 | - * Write method | |
15238 | - */ | |
15239 | -#define METHOD_NONE 0 | |
15240 | -#define RECONSTRUCT_WRITE 1 | |
15241 | -#define READ_MODIFY_WRITE 2 | |
15242 | - | |
15243 | -/* | |
15244 | - * Stripe state | |
15245 | - */ | |
15246 | -#define STRIPE_LOCKED 0 | |
15247 | -#define STRIPE_ERROR 1 | |
15248 | - | |
15249 | -/* | |
15250 | - * Stripe commands | |
15251 | - */ | |
15252 | -#define STRIPE_NONE 0 | |
15253 | -#define STRIPE_WRITE 1 | |
15254 | -#define STRIPE_READ 2 | |
15255 | - | |
15256 | -struct raid5_data { | |
15257 | - struct stripe_head **stripe_hashtbl; | |
15258 | - struct md_dev *mddev; | |
15259 | - struct md_thread *thread, *resync_thread; | |
15260 | - struct disk_info disks[MD_SB_DISKS]; | |
15261 | - struct disk_info *spare; | |
15262 | - int buffer_size; | |
15263 | - int chunk_size, level, algorithm; | |
15264 | - int raid_disks, working_disks, failed_disks; | |
15265 | - int sector_count; | |
15266 | - unsigned long next_sector; | |
15267 | - atomic_t nr_handle; | |
15268 | - struct stripe_head *next_free_stripe; | |
15269 | - int nr_stripes; | |
15270 | - int resync_parity; | |
15271 | - int max_nr_stripes; | |
15272 | - int clock; | |
15273 | - int nr_hashed_stripes; | |
15274 | - int nr_locked_stripes; | |
15275 | - int nr_pending_stripes; | |
15276 | - int nr_cached_stripes; | |
15277 | - | |
15278 | - /* | |
15279 | - * Free stripes pool | |
15280 | - */ | |
15281 | - int nr_free_sh; | |
15282 | - struct stripe_head *free_sh_list; | |
15283 | - struct wait_queue *wait_for_stripe; | |
15284 | -}; | |
15285 | - | |
15286 | -#endif | |
15287 | - | |
15288 | -/* | |
15289 | - * Our supported algorithms | |
15290 | - */ | |
15291 | -#define ALGORITHM_LEFT_ASYMMETRIC 0 | |
15292 | -#define ALGORITHM_RIGHT_ASYMMETRIC 1 | |
15293 | -#define ALGORITHM_LEFT_SYMMETRIC 2 | |
15294 | -#define ALGORITHM_RIGHT_SYMMETRIC 3 | |
15295 | - | |
15296 | -#endif | |
15297 | diff -ruN linux.orig/include/linux/sysctl.h linux-2.2.16/include/linux/sysctl.h | |
15298 | --- linux.orig/include/linux/sysctl.h Wed Jun 7 23:26:44 2000 | |
15299 | +++ linux-2.2.16/include/linux/sysctl.h Fri Jun 9 11:45:55 2000 | |
15300 | @@ -430,7 +430,8 @@ | |
15301 | /* CTL_DEV names: */ | |
15302 | enum { | |
15303 | DEV_CDROM=1, | |
15304 | - DEV_HWMON=2 | |
15305 | + DEV_HWMON=2, | |
15306 | + DEV_MD=3 | |
15307 | }; | |
15308 | ||
15309 | /* /proc/sys/dev/cdrom */ | |
15310 | @@ -441,6 +442,11 @@ | |
15311 | DEV_CDROM_DEBUG=4, | |
15312 | DEV_CDROM_LOCK=5, | |
15313 | DEV_CDROM_CHECK_MEDIA=6 | |
15314 | +}; | |
15315 | + | |
15316 | +/* /proc/sys/dev/md */ | |
15317 | +enum { | |
15318 | + DEV_MD_SPEED_LIMIT=1 | |
15319 | }; | |
15320 | ||
15321 | #ifdef __KERNEL__ | |
15322 | diff -ruN linux.orig/init/main.c linux-2.2.16/init/main.c | |
15323 | --- linux.orig/init/main.c Wed Jun 7 23:26:44 2000 | |
15324 | +++ linux-2.2.16/init/main.c Fri Jun 9 11:37:44 2000 | |
15325 | @@ -19,6 +19,7 @@ | |
15326 | #include <linux/utsname.h> | |
15327 | #include <linux/ioport.h> | |
15328 | #include <linux/init.h> | |
15329 | +#include <linux/raid/md.h> | |
15330 | #include <linux/smp_lock.h> | |
15331 | #include <linux/blk.h> | |
15332 | #include <linux/hdreg.h> | |
15333 | @@ -540,7 +541,7 @@ | |
15334 | #ifdef CONFIG_BLK_DEV_FD | |
15335 | { "fd", 0x0200 }, | |
15336 | #endif | |
15337 | -#ifdef CONFIG_MD_BOOT | |
15338 | +#if CONFIG_MD_BOOT || CONFIG_AUTODETECT_RAID | |
15339 | { "md", 0x0900 }, | |
15340 | #endif | |
15341 | #ifdef CONFIG_BLK_DEV_XD | |
15342 | @@ -1042,6 +1043,9 @@ | |
15343 | #ifdef CONFIG_MD_BOOT | |
15344 | { "md=", md_setup}, | |
15345 | #endif | |
15346 | +#if CONFIG_BLK_DEV_MD | |
15347 | + { "raid=", raid_setup}, | |
15348 | +#endif | |
15349 | #ifdef CONFIG_ADBMOUSE | |
15350 | { "adb_buttons=", adb_mouse_setup }, | |
15351 | #endif | |
15352 | @@ -1580,6 +1584,9 @@ | |
15353 | while (pid != wait(&i)); | |
15354 | if (MAJOR(real_root_dev) != RAMDISK_MAJOR | |
15355 | || MINOR(real_root_dev) != 0) { | |
15356 | +#ifdef CONFIG_BLK_DEV_MD | |
15357 | + autodetect_raid(); | |
15358 | +#endif | |
15359 | error = change_root(real_root_dev,"/initrd"); | |
15360 | if (error) | |
15361 | printk(KERN_ERR "Change root to /initrd: " |