]>
Commit | Line | Data |
---|---|---|
035452c4 KT |
1 | --- linux/init/main.c.orig Fri Nov 23 11:17:45 2001 |
2 | +++ linux/init/main.c Fri Nov 23 11:18:53 2001 | |
3 | @@ -19,6 +19,7 @@ | |
4 | #include <linux/utsname.h> | |
5 | #include <linux/ioport.h> | |
6 | #include <linux/init.h> | |
7 | +#include <linux/raid/md.h> | |
8 | #include <linux/smp_lock.h> | |
9 | #include <linux/blk.h> | |
10 | #include <linux/hdreg.h> | |
11 | @@ -1012,9 +1013,12 @@ | |
12 | #ifdef CONFIG_FTAPE | |
13 | { "ftape=", ftape_setup}, | |
14 | #endif | |
15 | -#ifdef CONFIG_MD_BOOT | |
16 | +#ifdef CONFIG_MD_BOOT || CONFIG_AUTODETECT_RAID | |
17 | { "md=", md_setup}, | |
18 | #endif | |
19 | +#if CONFIG_BLK_DEV_MD | |
20 | + { "raid=", raid_setup}, | |
21 | +#endif | |
22 | #ifdef CONFIG_ADBMOUSE | |
23 | { "adb_buttons=", adb_mouse_setup }, | |
24 | #endif | |
25 | @@ -1592,6 +1596,9 @@ | |
26 | while (pid != wait(&i)); | |
27 | if (MAJOR(real_root_dev) != RAMDISK_MAJOR | |
28 | || MINOR(real_root_dev) != 0) { | |
29 | +#ifdef CONFIG_BLK_DEV_MD | |
30 | + autodetect_raid(); | |
31 | +#endif | |
32 | error = change_root(real_root_dev,"/initrd"); | |
33 | if (error) | |
34 | printk(KERN_ERR "Change root to /initrd: " | |
35 | --- linux/include/linux/raid/hsm.h.orig Fri Nov 23 11:18:15 2001 | |
36 | +++ linux/include/linux/raid/hsm.h Fri Nov 23 11:18:15 2001 | |
37 | @@ -0,0 +1,65 @@ | |
38 | +#ifndef _HSM_H | |
39 | +#define _HSM_H | |
40 | + | |
41 | +#include <linux/raid/md.h> | |
42 | + | |
43 | +#if __alpha__ | |
44 | +#error fix cpu_addr on Alpha first | |
45 | +#endif | |
46 | + | |
47 | +#include <linux/raid/hsm_p.h> | |
48 | + | |
49 | +#define index_pv(lv,index) ((lv)->vg->pv_array+(index)->data.phys_nr) | |
50 | +#define index_dev(lv,index) index_pv((lv),(index))->dev | |
51 | +#define index_block(lv,index) (index)->data.phys_block | |
52 | +#define index_child(index) ((lv_lptr_t *)((index)->cpu_addr)) | |
53 | + | |
54 | +#define ptr_to_cpuaddr(ptr) ((__u32) (ptr)) | |
55 | + | |
56 | + | |
57 | +typedef struct pv_bg_desc_s { | |
58 | + unsigned int free_blocks; | |
59 | + pv_block_group_t *bg; | |
60 | +} pv_bg_desc_t; | |
61 | + | |
62 | +typedef struct pv_s pv_t; | |
63 | +typedef struct vg_s vg_t; | |
64 | +typedef struct lv_s lv_t; | |
65 | + | |
66 | +struct pv_s | |
67 | +{ | |
68 | + int phys_nr; | |
69 | + kdev_t dev; | |
70 | + pv_sb_t *pv_sb; | |
71 | + pv_bg_desc_t *bg_array; | |
72 | +}; | |
73 | + | |
74 | +struct lv_s | |
75 | +{ | |
76 | + int log_id; | |
77 | + vg_t *vg; | |
78 | + | |
79 | + unsigned int max_indices; | |
80 | + unsigned int free_indices; | |
81 | + lv_lptr_t root_index; | |
82 | + | |
83 | + kdev_t dev; | |
84 | +}; | |
85 | + | |
86 | +struct vg_s | |
87 | +{ | |
88 | + int nr_pv; | |
89 | + pv_t pv_array [MD_SB_DISKS]; | |
90 | + | |
91 | + int nr_lv; | |
92 | + lv_t lv_array [HSM_MAX_LVS_PER_VG]; | |
93 | + | |
94 | + vg_sb_t *vg_sb; | |
95 | + mddev_t *mddev; | |
96 | +}; | |
97 | + | |
98 | +#define kdev_to_lv(dev) ((lv_t *) mddev_map[MINOR(dev)].data) | |
99 | +#define mddev_to_vg(mddev) ((vg_t *) mddev->private) | |
100 | + | |
101 | +#endif | |
102 | + | |
103 | --- linux/include/linux/raid/hsm_p.h.orig Fri Nov 23 11:18:15 2001 | |
104 | +++ linux/include/linux/raid/hsm_p.h Fri Nov 23 11:18:15 2001 | |
105 | @@ -0,0 +1,237 @@ | |
106 | +#ifndef _HSM_P_H | |
107 | +#define _HSM_P_H | |
108 | + | |
109 | +#define HSM_BLOCKSIZE 4096 | |
110 | +#define HSM_BLOCKSIZE_WORDS (HSM_BLOCKSIZE/4) | |
111 | +#define PACKED __attribute__ ((packed)) | |
112 | + | |
113 | +/* | |
114 | + * Identifies a block in physical space | |
115 | + */ | |
116 | +typedef struct phys_idx_s { | |
117 | + __u16 phys_nr; | |
118 | + __u32 phys_block; | |
119 | + | |
120 | +} PACKED phys_idx_t; | |
121 | + | |
122 | +/* | |
123 | + * Identifies a block in logical space | |
124 | + */ | |
125 | +typedef struct log_idx_s { | |
126 | + __u16 log_id; | |
127 | + __u32 log_index; | |
128 | + | |
129 | +} PACKED log_idx_t; | |
130 | + | |
131 | +/* | |
132 | + * Describes one PV | |
133 | + */ | |
134 | +#define HSM_PV_SB_MAGIC 0xf091ae9fU | |
135 | + | |
136 | +#define HSM_PV_SB_GENERIC_WORDS 32 | |
137 | +#define HSM_PV_SB_RESERVED_WORDS \ | |
138 | + (HSM_BLOCKSIZE_WORDS - HSM_PV_SB_GENERIC_WORDS) | |
139 | + | |
140 | +/* | |
141 | + * On-disk PV identification data, on block 0 in any PV. | |
142 | + */ | |
143 | +typedef struct pv_sb_s | |
144 | +{ | |
145 | + __u32 pv_magic; /* 0 */ | |
146 | + | |
147 | + __u32 pv_uuid0; /* 1 */ | |
148 | + __u32 pv_uuid1; /* 2 */ | |
149 | + __u32 pv_uuid2; /* 3 */ | |
150 | + __u32 pv_uuid3; /* 4 */ | |
151 | + | |
152 | + __u32 pv_major; /* 5 */ | |
153 | + __u32 pv_minor; /* 6 */ | |
154 | + __u32 pv_patch; /* 7 */ | |
155 | + | |
156 | + __u32 pv_ctime; /* 8 Creation time */ | |
157 | + | |
158 | + __u32 pv_total_size; /* 9 size of this PV, in blocks */ | |
159 | + __u32 pv_first_free; /* 10 first free block */ | |
160 | + __u32 pv_first_used; /* 11 first used block */ | |
161 | + __u32 pv_blocks_left; /* 12 unallocated blocks */ | |
162 | + __u32 pv_bg_size; /* 13 size of a block group, in blocks */ | |
163 | + __u32 pv_block_size; /* 14 size of blocks, in bytes */ | |
164 | + __u32 pv_pptr_size; /* 15 size of block descriptor, in bytes */ | |
165 | + __u32 pv_block_groups; /* 16 number of block groups */ | |
166 | + | |
167 | + __u32 __reserved1[HSM_PV_SB_GENERIC_WORDS - 17]; | |
168 | + | |
169 | + /* | |
170 | + * Reserved | |
171 | + */ | |
172 | + __u32 __reserved2[HSM_PV_SB_RESERVED_WORDS]; | |
173 | + | |
174 | +} PACKED pv_sb_t; | |
175 | + | |
176 | +/* | |
177 | + * this is pretty much arbitrary, but has to be less than ~64 | |
178 | + */ | |
179 | +#define HSM_MAX_LVS_PER_VG 32 | |
180 | + | |
181 | +#define HSM_VG_SB_GENERIC_WORDS 32 | |
182 | + | |
183 | +#define LV_DESCRIPTOR_WORDS 8 | |
184 | +#define HSM_VG_SB_RESERVED_WORDS (HSM_BLOCKSIZE_WORDS - \ | |
185 | + LV_DESCRIPTOR_WORDS*HSM_MAX_LVS_PER_VG - HSM_VG_SB_GENERIC_WORDS) | |
186 | + | |
187 | +#if (HSM_PV_SB_RESERVED_WORDS < 0) | |
188 | +#error you messed this one up dude ... | |
189 | +#endif | |
190 | + | |
191 | +typedef struct lv_descriptor_s | |
192 | +{ | |
193 | + __u32 lv_id; /* 0 */ | |
194 | + phys_idx_t lv_root_idx; /* 1 */ | |
195 | + __u16 __reserved; /* 2 */ | |
196 | + __u32 lv_max_indices; /* 3 */ | |
197 | + __u32 lv_free_indices; /* 4 */ | |
198 | + __u32 md_id; /* 5 */ | |
199 | + | |
200 | + __u32 reserved[LV_DESCRIPTOR_WORDS - 6]; | |
201 | + | |
202 | +} PACKED lv_descriptor_t; | |
203 | + | |
204 | +#define HSM_VG_SB_MAGIC 0x98320d7aU | |
205 | +/* | |
206 | + * On-disk VG identification data, in block 1 on all PVs | |
207 | + */ | |
208 | +typedef struct vg_sb_s | |
209 | +{ | |
210 | + __u32 vg_magic; /* 0 */ | |
211 | + __u32 nr_lvs; /* 1 */ | |
212 | + | |
213 | + __u32 __reserved1[HSM_VG_SB_GENERIC_WORDS - 2]; | |
214 | + | |
215 | + lv_descriptor_t lv_array [HSM_MAX_LVS_PER_VG]; | |
216 | + /* | |
217 | + * Reserved | |
218 | + */ | |
219 | + __u32 __reserved2[HSM_VG_SB_RESERVED_WORDS]; | |
220 | + | |
221 | +} PACKED vg_sb_t; | |
222 | + | |
223 | +/* | |
224 | + * Describes one LV | |
225 | + */ | |
226 | + | |
227 | +#define HSM_LV_SB_MAGIC 0xe182bd8aU | |
228 | + | |
229 | +/* do we need lv_sb_t? */ | |
230 | + | |
231 | +typedef struct lv_sb_s | |
232 | +{ | |
233 | + /* | |
234 | + * On-disk LV identifier | |
235 | + */ | |
236 | + __u32 lv_magic; /* 0 LV identifier */ | |
237 | + __u32 lv_uuid0; /* 1 */ | |
238 | + __u32 lv_uuid1; /* 2 */ | |
239 | + __u32 lv_uuid2; /* 3 */ | |
240 | + __u32 lv_uuid3; /* 4 */ | |
241 | + | |
242 | + __u32 lv_major; /* 5 PV identifier */ | |
243 | + __u32 lv_minor; /* 6 PV identifier */ | |
244 | + __u32 lv_patch; /* 7 PV identifier */ | |
245 | + | |
246 | + __u32 ctime; /* 8 Creation time */ | |
247 | + __u32 size; /* 9 size of this LV, in blocks */ | |
248 | + phys_idx_t start; /* 10 position of root index block */ | |
249 | + log_idx_t first_free; /* 11-12 first free index */ | |
250 | + | |
251 | + /* | |
252 | + * Reserved | |
253 | + */ | |
254 | + __u32 reserved[HSM_BLOCKSIZE_WORDS-13]; | |
255 | + | |
256 | +} PACKED lv_sb_t; | |
257 | + | |
258 | +/* | |
259 | + * Pointer pointing from the physical space, points to | |
260 | + * the LV owning this block. It also contains various | |
261 | + * statistics about the physical block. | |
262 | + */ | |
263 | +typedef struct pv_pptr_s | |
264 | +{ | |
265 | + union { | |
266 | + /* case 1 */ | |
267 | + struct { | |
268 | + log_idx_t owner; | |
269 | + log_idx_t predicted; | |
270 | + __u32 last_referenced; | |
271 | + } used; | |
272 | + /* case 2 */ | |
273 | + struct { | |
274 | + __u16 log_id; | |
275 | + __u16 __unused1; | |
276 | + __u32 next_free; | |
277 | + __u32 __unused2; | |
278 | + __u32 __unused3; | |
279 | + } free; | |
280 | + } u; | |
281 | +} PACKED pv_pptr_t; | |
282 | + | |
283 | +static __inline__ int pv_pptr_free (const pv_pptr_t * pptr) | |
284 | +{ | |
285 | + return !pptr->u.free.log_id; | |
286 | +} | |
287 | + | |
288 | + | |
289 | +#define DATA_BLOCKS_PER_BG ((HSM_BLOCKSIZE*8)/(8*sizeof(pv_pptr_t)+1)) | |
290 | + | |
291 | +#define TOTAL_BLOCKS_PER_BG (DATA_BLOCKS_PER_BG+1) | |
292 | +/* | |
293 | + * A table of pointers filling up a single block, managing | |
294 | + * the next DATA_BLOCKS_PER_BG physical blocks. Such block | |
295 | + * groups form the physical space of blocks. | |
296 | + */ | |
297 | +typedef struct pv_block_group_s | |
298 | +{ | |
299 | + __u8 used_bitmap[(DATA_BLOCKS_PER_BG+7)/8]; | |
300 | + | |
301 | + pv_pptr_t blocks[DATA_BLOCKS_PER_BG]; | |
302 | + | |
303 | +} PACKED pv_block_group_t; | |
304 | + | |
305 | +/* | |
306 | + * Pointer from the logical space, points to | |
307 | + * the (PV,block) containing this logical block | |
308 | + */ | |
309 | +typedef struct lv_lptr_s | |
310 | +{ | |
311 | + phys_idx_t data; | |
312 | + __u16 __reserved; | |
313 | + __u32 cpu_addr; | |
314 | + __u32 __reserved2; | |
315 | + | |
316 | +} PACKED lv_lptr_t; | |
317 | + | |
318 | +static __inline__ int index_free (const lv_lptr_t * index) | |
319 | +{ | |
320 | + return !index->data.phys_block; | |
321 | +} | |
322 | + | |
323 | +static __inline__ int index_present (const lv_lptr_t * index) | |
324 | +{ | |
325 | + return index->cpu_addr; | |
326 | +} | |
327 | + | |
328 | + | |
329 | +#define HSM_LPTRS_PER_BLOCK (HSM_BLOCKSIZE/sizeof(lv_lptr_t)) | |
330 | +/* | |
331 | + * A table of pointers filling up a single block, managing | |
332 | + * HSM_LPTRS_PER_BLOCK logical blocks. Such block groups form | |
333 | + * the logical space of blocks. | |
334 | + */ | |
335 | +typedef struct lv_index_block_s | |
336 | +{ | |
337 | + lv_lptr_t blocks[HSM_LPTRS_PER_BLOCK]; | |
338 | + | |
339 | +} PACKED lv_index_block_t; | |
340 | + | |
341 | +#endif | |
342 | + | |
343 | --- linux/include/linux/raid/linear.h.orig Fri Nov 23 11:18:15 2001 | |
344 | +++ linux/include/linux/raid/linear.h Fri Nov 23 11:18:15 2001 | |
345 | @@ -0,0 +1,32 @@ | |
346 | +#ifndef _LINEAR_H | |
347 | +#define _LINEAR_H | |
348 | + | |
349 | +#include <linux/raid/md.h> | |
350 | + | |
351 | +struct dev_info { | |
352 | + kdev_t dev; | |
353 | + int size; | |
354 | + unsigned int offset; | |
355 | +}; | |
356 | + | |
357 | +typedef struct dev_info dev_info_t; | |
358 | + | |
359 | +struct linear_hash | |
360 | +{ | |
361 | + dev_info_t *dev0, *dev1; | |
362 | +}; | |
363 | + | |
364 | +struct linear_private_data | |
365 | +{ | |
366 | + struct linear_hash *hash_table; | |
367 | + dev_info_t disks[MD_SB_DISKS]; | |
368 | + dev_info_t *smallest; | |
369 | + int nr_zones; | |
370 | +}; | |
371 | + | |
372 | + | |
373 | +typedef struct linear_private_data linear_conf_t; | |
374 | + | |
375 | +#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private) | |
376 | + | |
377 | +#endif | |
378 | --- linux/include/linux/raid/md.h.orig Fri Nov 23 11:18:15 2001 | |
379 | +++ linux/include/linux/raid/md.h Fri Nov 23 11:18:15 2001 | |
380 | @@ -0,0 +1,96 @@ | |
381 | +/* | |
382 | + md.h : Multiple Devices driver for Linux | |
383 | + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | |
384 | + Copyright (C) 1994-96 Marc ZYNGIER | |
385 | + <zyngier@ufr-info-p7.ibp.fr> or | |
386 | + <maz@gloups.fdn.fr> | |
387 | + | |
388 | + This program is free software; you can redistribute it and/or modify | |
389 | + it under the terms of the GNU General Public License as published by | |
390 | + the Free Software Foundation; either version 2, or (at your option) | |
391 | + any later version. | |
392 | + | |
393 | + You should have received a copy of the GNU General Public License | |
394 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
395 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
396 | +*/ | |
397 | + | |
398 | +#ifndef _MD_H | |
399 | +#define _MD_H | |
400 | + | |
401 | +#include <linux/mm.h> | |
402 | +#include <linux/fs.h> | |
403 | +#include <linux/blkdev.h> | |
404 | +#include <asm/semaphore.h> | |
405 | +#include <linux/major.h> | |
406 | +#include <linux/ioctl.h> | |
407 | +#include <linux/types.h> | |
408 | +#include <asm/bitops.h> | |
409 | +#include <linux/module.h> | |
410 | +#include <linux/mm.h> | |
411 | +#include <linux/hdreg.h> | |
412 | +#include <linux/sysctl.h> | |
413 | +#include <linux/fs.h> | |
414 | +#include <linux/proc_fs.h> | |
415 | +#include <linux/smp_lock.h> | |
416 | +#include <linux/delay.h> | |
417 | +#include <net/checksum.h> | |
418 | +#include <linux/random.h> | |
419 | +#include <linux/locks.h> | |
420 | +#include <asm/io.h> | |
421 | + | |
422 | +#include <linux/raid/md_compatible.h> | |
423 | +/* | |
424 | + * 'md_p.h' holds the 'physical' layout of RAID devices | |
425 | + * 'md_u.h' holds the user <=> kernel API | |
426 | + * | |
427 | + * 'md_k.h' holds kernel internal definitions | |
428 | + */ | |
429 | + | |
430 | +#include <linux/raid/md_p.h> | |
431 | +#include <linux/raid/md_u.h> | |
432 | +#include <linux/raid/md_k.h> | |
433 | + | |
434 | +/* | |
435 | + * Different major versions are not compatible. | |
436 | + * Different minor versions are only downward compatible. | |
437 | + * Different patchlevel versions are downward and upward compatible. | |
438 | + */ | |
439 | +#define MD_MAJOR_VERSION 0 | |
440 | +#define MD_MINOR_VERSION 90 | |
441 | +#define MD_PATCHLEVEL_VERSION 0 | |
442 | + | |
443 | +extern int md_size[MAX_MD_DEVS]; | |
444 | +extern struct hd_struct md_hd_struct[MAX_MD_DEVS]; | |
445 | + | |
446 | +extern void add_mddev_mapping (mddev_t *mddev, kdev_t dev, void *data); | |
447 | +extern void del_mddev_mapping (mddev_t *mddev, kdev_t dev); | |
448 | +extern char * partition_name (kdev_t dev); | |
449 | +extern int register_md_personality (int p_num, mdk_personality_t *p); | |
450 | +extern int unregister_md_personality (int p_num); | |
451 | +extern mdk_thread_t * md_register_thread (void (*run) (void *data), | |
452 | + void *data, const char *name); | |
453 | +extern void md_unregister_thread (mdk_thread_t *thread); | |
454 | +extern void md_wakeup_thread(mdk_thread_t *thread); | |
455 | +extern void md_interrupt_thread (mdk_thread_t *thread); | |
456 | +extern int md_update_sb (mddev_t *mddev); | |
457 | +extern int md_do_sync(mddev_t *mddev, mdp_disk_t *spare); | |
458 | +extern void md_recover_arrays (void); | |
459 | +extern int md_check_ordering (mddev_t *mddev); | |
460 | +extern void autodetect_raid(void); | |
461 | +extern struct gendisk * find_gendisk (kdev_t dev); | |
462 | +extern int md_notify_reboot(struct notifier_block *this, | |
463 | + unsigned long code, void *x); | |
464 | +#if CONFIG_BLK_DEV_MD | |
465 | +extern void raid_setup(char *str,int *ints) md__init; | |
466 | +#endif | |
467 | +#ifdef CONFIG_MD_BOOT | |
468 | +extern void md_setup(char *str,int *ints) md__init; | |
469 | +#endif | |
470 | + | |
471 | +extern void md_print_devices (void); | |
472 | + | |
473 | +#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | |
474 | + | |
475 | +#endif _MD_H | |
476 | + | |
477 | --- linux/include/linux/raid/md_compatible.h.orig Fri Nov 23 11:18:16 2001 | |
478 | +++ linux/include/linux/raid/md_compatible.h Fri Nov 23 11:18:16 2001 | |
479 | @@ -0,0 +1,387 @@ | |
480 | + | |
481 | +/* | |
482 | + md.h : Multiple Devices driver compatibility layer for Linux 2.0/2.2 | |
483 | + Copyright (C) 1998 Ingo Molnar | |
484 | + | |
485 | + This program is free software; you can redistribute it and/or modify | |
486 | + it under the terms of the GNU General Public License as published by | |
487 | + the Free Software Foundation; either version 2, or (at your option) | |
488 | + any later version. | |
489 | + | |
490 | + You should have received a copy of the GNU General Public License | |
491 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
492 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
493 | +*/ | |
494 | + | |
495 | +#include <linux/version.h> | |
496 | + | |
497 | +#ifndef _MD_COMPATIBLE_H | |
498 | +#define _MD_COMPATIBLE_H | |
499 | + | |
500 | +#define LinuxVersionCode(v, p, s) (((v)<<16)+((p)<<8)+(s)) | |
501 | + | |
502 | +#if LINUX_VERSION_CODE < LinuxVersionCode(2,1,0) | |
503 | + | |
504 | +/* 000 */ | |
505 | +#define md__get_free_pages(x,y) __get_free_pages(x,y,GFP_KERNEL) | |
506 | + | |
507 | +#ifdef __i386__ | |
508 | +/* 001 */ | |
509 | +extern __inline__ int md_cpu_has_mmx(void) | |
510 | +{ | |
511 | + return x86_capability & 0x00800000; | |
512 | +} | |
513 | +#endif | |
514 | + | |
515 | +/* 002 */ | |
516 | +#define md_clear_page(page) memset((void *)(page), 0, PAGE_SIZE) | |
517 | + | |
518 | +/* 003 */ | |
519 | +/* | |
520 | + * someone please suggest a sane compatibility layer for modules | |
521 | + */ | |
522 | +#define MD_EXPORT_SYMBOL(x) | |
523 | + | |
524 | +/* 004 */ | |
525 | +static inline unsigned long | |
526 | +md_copy_from_user(void *to, const void *from, unsigned long n) | |
527 | +{ | |
528 | + int err; | |
529 | + | |
530 | + err = verify_area(VERIFY_READ,from,n); | |
531 | + if (!err) | |
532 | + memcpy_fromfs(to, from, n); | |
533 | + return err; | |
534 | +} | |
535 | + | |
536 | +/* 005 */ | |
537 | +extern inline unsigned long | |
538 | +md_copy_to_user(void *to, const void *from, unsigned long n) | |
539 | +{ | |
540 | + int err; | |
541 | + | |
542 | + err = verify_area(VERIFY_WRITE,to,n); | |
543 | + if (!err) | |
544 | + memcpy_tofs(to, from, n); | |
545 | + return err; | |
546 | +} | |
547 | + | |
548 | +/* 006 */ | |
549 | +#define md_put_user(x,ptr) \ | |
550 | +({ \ | |
551 | + int __err; \ | |
552 | + \ | |
553 | + __err = verify_area(VERIFY_WRITE,ptr,sizeof(*ptr)); \ | |
554 | + if (!__err) \ | |
555 | + put_user(x,ptr); \ | |
556 | + __err; \ | |
557 | +}) | |
558 | + | |
559 | +/* 007 */ | |
560 | +extern inline int md_capable_admin(void) | |
561 | +{ | |
562 | + return suser(); | |
563 | +} | |
564 | + | |
565 | +/* 008 */ | |
566 | +#define MD_FILE_TO_INODE(file) ((file)->f_inode) | |
567 | + | |
568 | +/* 009 */ | |
569 | +extern inline void md_flush_signals (void) | |
570 | +{ | |
571 | + current->signal = 0; | |
572 | +} | |
573 | + | |
574 | +/* 010 */ | |
575 | +#define __S(nr) (1<<((nr)-1)) | |
576 | +extern inline void md_init_signals (void) | |
577 | +{ | |
578 | + current->exit_signal = SIGCHLD; | |
579 | + current->blocked = ~(__S(SIGKILL)); | |
580 | +} | |
581 | +#undef __S | |
582 | + | |
583 | +/* 011 */ | |
584 | +extern inline unsigned long md_signal_pending (struct task_struct * tsk) | |
585 | +{ | |
586 | + return (tsk->signal & ~tsk->blocked); | |
587 | +} | |
588 | + | |
589 | +/* 012 */ | |
590 | +#define md_set_global_readahead(x) read_ahead[MD_MAJOR] = MD_READAHEAD | |
591 | + | |
592 | +/* 013 */ | |
593 | +#define md_mdelay(n) (\ | |
594 | + {unsigned long msec=(n); while (msec--) udelay(1000);}) | |
595 | + | |
596 | +/* 014 */ | |
597 | +#define MD_SYS_DOWN 0 | |
598 | +#define MD_SYS_HALT 0 | |
599 | +#define MD_SYS_POWER_OFF 0 | |
600 | + | |
601 | +/* 015 */ | |
602 | +#define md_register_reboot_notifier(x) | |
603 | + | |
604 | +/* 016 */ | |
605 | +extern __inline__ unsigned long | |
606 | +md_test_and_set_bit(int nr, void * addr) | |
607 | +{ | |
608 | + unsigned long flags; | |
609 | + unsigned long oldbit; | |
610 | + | |
611 | + save_flags(flags); | |
612 | + cli(); | |
613 | + oldbit = test_bit(nr,addr); | |
614 | + set_bit(nr,addr); | |
615 | + restore_flags(flags); | |
616 | + return oldbit; | |
617 | +} | |
618 | + | |
619 | +/* 017 */ | |
620 | +extern __inline__ unsigned long | |
621 | +md_test_and_clear_bit(int nr, void * addr) | |
622 | +{ | |
623 | + unsigned long flags; | |
624 | + unsigned long oldbit; | |
625 | + | |
626 | + save_flags(flags); | |
627 | + cli(); | |
628 | + oldbit = test_bit(nr,addr); | |
629 | + clear_bit(nr,addr); | |
630 | + restore_flags(flags); | |
631 | + return oldbit; | |
632 | +} | |
633 | + | |
634 | +/* 018 */ | |
635 | +#define md_atomic_read(x) (*(volatile int *)(x)) | |
636 | +#define md_atomic_set(x,y) (*(volatile int *)(x) = (y)) | |
637 | + | |
638 | +/* 019 */ | |
639 | +extern __inline__ void md_lock_kernel (void) | |
640 | +{ | |
641 | +#if __SMP__ | |
642 | + lock_kernel(); | |
643 | + syscall_count++; | |
644 | +#endif | |
645 | +} | |
646 | + | |
647 | +extern __inline__ void md_unlock_kernel (void) | |
648 | +{ | |
649 | +#if __SMP__ | |
650 | + syscall_count--; | |
651 | + unlock_kernel(); | |
652 | +#endif | |
653 | +} | |
654 | +/* 020 */ | |
655 | + | |
656 | +#define md__init | |
657 | +#define md__initdata | |
658 | +#define md__initfunc(__arginit) __arginit | |
659 | + | |
660 | +/* 021 */ | |
661 | + | |
662 | +/* 022 */ | |
663 | + | |
664 | +struct md_list_head { | |
665 | + struct md_list_head *next, *prev; | |
666 | +}; | |
667 | + | |
668 | +#define MD_LIST_HEAD(name) \ | |
669 | + struct md_list_head name = { &name, &name } | |
670 | + | |
671 | +#define MD_INIT_LIST_HEAD(ptr) do { \ | |
672 | + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ | |
673 | +} while (0) | |
674 | + | |
675 | +static __inline__ void md__list_add(struct md_list_head * new, | |
676 | + struct md_list_head * prev, | |
677 | + struct md_list_head * next) | |
678 | +{ | |
679 | + next->prev = new; | |
680 | + new->next = next; | |
681 | + new->prev = prev; | |
682 | + prev->next = new; | |
683 | +} | |
684 | + | |
685 | +static __inline__ void md_list_add(struct md_list_head *new, | |
686 | + struct md_list_head *head) | |
687 | +{ | |
688 | + md__list_add(new, head, head->next); | |
689 | +} | |
690 | + | |
691 | +static __inline__ void md__list_del(struct md_list_head * prev, | |
692 | + struct md_list_head * next) | |
693 | +{ | |
694 | + next->prev = prev; | |
695 | + prev->next = next; | |
696 | +} | |
697 | + | |
698 | +static __inline__ void md_list_del(struct md_list_head *entry) | |
699 | +{ | |
700 | + md__list_del(entry->prev, entry->next); | |
701 | +} | |
702 | + | |
703 | +static __inline__ int md_list_empty(struct md_list_head *head) | |
704 | +{ | |
705 | + return head->next == head; | |
706 | +} | |
707 | + | |
708 | +#define md_list_entry(ptr, type, member) \ | |
709 | + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) | |
710 | + | |
711 | +/* 023 */ | |
712 | + | |
713 | +static __inline__ signed long md_schedule_timeout(signed long timeout) | |
714 | +{ | |
715 | + current->timeout = jiffies + timeout; | |
716 | + schedule(); | |
717 | + return 0; | |
718 | +} | |
719 | + | |
720 | +/* 024 */ | |
721 | +#define md_need_resched(tsk) (need_resched) | |
722 | + | |
723 | +/* 025 */ | |
724 | +typedef struct { int gcc_is_buggy; } md_spinlock_t; | |
725 | +#define MD_SPIN_LOCK_UNLOCKED (md_spinlock_t) { 0 } | |
726 | + | |
727 | +#define md_spin_lock_irq cli | |
728 | +#define md_spin_unlock_irq sti | |
729 | +#define md_spin_unlock_irqrestore(x,flags) restore_flags(flags) | |
730 | +#define md_spin_lock_irqsave(x,flags) do { save_flags(flags); cli(); } while (0) | |
731 | + | |
732 | +/* END */ | |
733 | + | |
734 | +#else | |
735 | + | |
736 | +#include <linux/reboot.h> | |
737 | +#include <linux/vmalloc.h> | |
738 | + | |
739 | +/* 000 */ | |
740 | +#define md__get_free_pages(x,y) __get_free_pages(x,y) | |
741 | + | |
742 | +#ifdef __i386__ | |
743 | +/* 001 */ | |
744 | +extern __inline__ int md_cpu_has_mmx(void) | |
745 | +{ | |
746 | + return boot_cpu_data.x86_capability & X86_FEATURE_MMX; | |
747 | +} | |
748 | +#endif | |
749 | + | |
750 | +/* 002 */ | |
751 | +#define md_clear_page(page) clear_page(page) | |
752 | + | |
753 | +/* 003 */ | |
754 | +#define MD_EXPORT_SYMBOL(x) EXPORT_SYMBOL(x) | |
755 | + | |
756 | +/* 004 */ | |
757 | +#define md_copy_to_user(x,y,z) copy_to_user(x,y,z) | |
758 | + | |
759 | +/* 005 */ | |
760 | +#define md_copy_from_user(x,y,z) copy_from_user(x,y,z) | |
761 | + | |
762 | +/* 006 */ | |
763 | +#define md_put_user put_user | |
764 | + | |
765 | +/* 007 */ | |
766 | +extern inline int md_capable_admin(void) | |
767 | +{ | |
768 | + return capable(CAP_SYS_ADMIN); | |
769 | +} | |
770 | + | |
771 | +/* 008 */ | |
772 | +#define MD_FILE_TO_INODE(file) ((file)->f_dentry->d_inode) | |
773 | + | |
774 | +/* 009 */ | |
775 | +extern inline void md_flush_signals (void) | |
776 | +{ | |
777 | + spin_lock(¤t->sigmask_lock); | |
778 | + flush_signals(current); | |
779 | + spin_unlock(¤t->sigmask_lock); | |
780 | +} | |
781 | + | |
782 | +/* 010 */ | |
783 | +extern inline void md_init_signals (void) | |
784 | +{ | |
785 | + current->exit_signal = SIGCHLD; | |
786 | + siginitsetinv(¤t->blocked, sigmask(SIGKILL)); | |
787 | +} | |
788 | + | |
789 | +/* 011 */ | |
790 | +#define md_signal_pending signal_pending | |
791 | + | |
792 | +/* 012 */ | |
793 | +extern inline void md_set_global_readahead(int * table) | |
794 | +{ | |
795 | + max_readahead[MD_MAJOR] = table; | |
796 | +} | |
797 | + | |
798 | +/* 013 */ | |
799 | +#define md_mdelay(x) mdelay(x) | |
800 | + | |
801 | +/* 014 */ | |
802 | +#define MD_SYS_DOWN SYS_DOWN | |
803 | +#define MD_SYS_HALT SYS_HALT | |
804 | +#define MD_SYS_POWER_OFF SYS_POWER_OFF | |
805 | + | |
806 | +/* 015 */ | |
807 | +#define md_register_reboot_notifier register_reboot_notifier | |
808 | + | |
809 | +/* 016 */ | |
810 | +#define md_test_and_set_bit test_and_set_bit | |
811 | + | |
812 | +/* 017 */ | |
813 | +#define md_test_and_clear_bit test_and_clear_bit | |
814 | + | |
815 | +/* 018 */ | |
816 | +#define md_atomic_read atomic_read | |
817 | +#define md_atomic_set atomic_set | |
818 | + | |
819 | +/* 019 */ | |
820 | +#define md_lock_kernel lock_kernel | |
821 | +#define md_unlock_kernel unlock_kernel | |
822 | + | |
823 | +/* 020 */ | |
824 | + | |
825 | +#include <linux/init.h> | |
826 | + | |
827 | +#define md__init __init | |
828 | +#define md__initdata __initdata | |
829 | +#define md__initfunc(__arginit) __initfunc(__arginit) | |
830 | + | |
831 | +/* 021 */ | |
832 | + | |
833 | + | |
834 | +/* 022 */ | |
835 | + | |
836 | +#define md_list_head list_head | |
837 | +#define MD_LIST_HEAD(name) LIST_HEAD(name) | |
838 | +#define MD_INIT_LIST_HEAD(ptr) INIT_LIST_HEAD(ptr) | |
839 | +#define md_list_add list_add | |
840 | +#define md_list_del list_del | |
841 | +#define md_list_empty list_empty | |
842 | + | |
843 | +#define md_list_entry(ptr, type, member) list_entry(ptr, type, member) | |
844 | + | |
845 | +/* 023 */ | |
846 | + | |
847 | +#define md_schedule_timeout schedule_timeout | |
848 | + | |
849 | +/* 024 */ | |
850 | +#define md_need_resched(tsk) ((tsk)->need_resched) | |
851 | + | |
852 | +/* 025 */ | |
853 | +#define md_spinlock_t spinlock_t | |
854 | +#define MD_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED | |
855 | + | |
856 | +#define md_spin_lock_irq spin_lock_irq | |
857 | +#define md_spin_unlock_irq spin_unlock_irq | |
858 | +#define md_spin_unlock_irqrestore spin_unlock_irqrestore | |
859 | +#define md_spin_lock_irqsave spin_lock_irqsave | |
860 | + | |
861 | +/* END */ | |
862 | + | |
863 | +#endif | |
864 | + | |
865 | +#endif _MD_COMPATIBLE_H | |
866 | + | |
867 | --- linux/include/linux/raid/md_k.h.orig Fri Nov 23 11:18:16 2001 | |
868 | +++ linux/include/linux/raid/md_k.h Fri Nov 23 11:18:16 2001 | |
869 | @@ -0,0 +1,338 @@ | |
870 | +/* | |
871 | + md_k.h : kernel internal structure of the Linux MD driver | |
872 | + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | |
873 | + | |
874 | + This program is free software; you can redistribute it and/or modify | |
875 | + it under the terms of the GNU General Public License as published by | |
876 | + the Free Software Foundation; either version 2, or (at your option) | |
877 | + any later version. | |
878 | + | |
879 | + You should have received a copy of the GNU General Public License | |
880 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
881 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
882 | +*/ | |
883 | + | |
884 | +#ifndef _MD_K_H | |
885 | +#define _MD_K_H | |
886 | + | |
887 | +#define MD_RESERVED 0UL | |
888 | +#define LINEAR 1UL | |
889 | +#define STRIPED 2UL | |
890 | +#define RAID0 STRIPED | |
891 | +#define RAID1 3UL | |
892 | +#define RAID5 4UL | |
893 | +#define TRANSLUCENT 5UL | |
894 | +#define HSM 6UL | |
895 | +#define MAX_PERSONALITY 7UL | |
896 | + | |
897 | +extern inline int pers_to_level (int pers) | |
898 | +{ | |
899 | + switch (pers) { | |
900 | + case HSM: return -3; | |
901 | + case TRANSLUCENT: return -2; | |
902 | + case LINEAR: return -1; | |
903 | + case RAID0: return 0; | |
904 | + case RAID1: return 1; | |
905 | + case RAID5: return 5; | |
906 | + } | |
907 | + panic("pers_to_level()"); | |
908 | +} | |
909 | + | |
910 | +extern inline int level_to_pers (int level) | |
911 | +{ | |
912 | + switch (level) { | |
913 | + case -3: return HSM; | |
914 | + case -2: return TRANSLUCENT; | |
915 | + case -1: return LINEAR; | |
916 | + case 0: return RAID0; | |
917 | + case 1: return RAID1; | |
918 | + case 4: | |
919 | + case 5: return RAID5; | |
920 | + } | |
921 | + return MD_RESERVED; | |
922 | +} | |
923 | + | |
924 | +typedef struct mddev_s mddev_t; | |
925 | +typedef struct mdk_rdev_s mdk_rdev_t; | |
926 | + | |
927 | +#if (MINORBITS != 8) | |
928 | +#error MD doesnt handle bigger kdev yet | |
929 | +#endif | |
930 | + | |
931 | +#define MAX_REAL 12 /* Max number of disks per md dev */ | |
932 | +#define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */ | |
933 | + | |
934 | +/* | |
935 | + * Maps a kdev to an mddev/subdev. How 'data' is handled is up to | |
936 | + * the personality. (eg. HSM uses this to identify individual LVs) | |
937 | + */ | |
938 | +typedef struct dev_mapping_s { | |
939 | + mddev_t *mddev; | |
940 | + void *data; | |
941 | +} dev_mapping_t; | |
942 | + | |
943 | +extern dev_mapping_t mddev_map [MAX_MD_DEVS]; | |
944 | + | |
945 | +extern inline mddev_t * kdev_to_mddev (kdev_t dev) | |
946 | +{ | |
947 | + return mddev_map[MINOR(dev)].mddev; | |
948 | +} | |
949 | + | |
950 | +/* | |
951 | + * options passed in raidrun: | |
952 | + */ | |
953 | + | |
954 | +#define MAX_CHUNK_SIZE (4096*1024) | |
955 | + | |
956 | +/* | |
957 | + * default readahead | |
958 | + */ | |
959 | +#define MD_READAHEAD (256 * 512) | |
960 | + | |
961 | +extern inline int disk_faulty(mdp_disk_t * d) | |
962 | +{ | |
963 | + return d->state & (1 << MD_DISK_FAULTY); | |
964 | +} | |
965 | + | |
966 | +extern inline int disk_active(mdp_disk_t * d) | |
967 | +{ | |
968 | + return d->state & (1 << MD_DISK_ACTIVE); | |
969 | +} | |
970 | + | |
971 | +extern inline int disk_sync(mdp_disk_t * d) | |
972 | +{ | |
973 | + return d->state & (1 << MD_DISK_SYNC); | |
974 | +} | |
975 | + | |
976 | +extern inline int disk_spare(mdp_disk_t * d) | |
977 | +{ | |
978 | + return !disk_sync(d) && !disk_active(d) && !disk_faulty(d); | |
979 | +} | |
980 | + | |
981 | +extern inline int disk_removed(mdp_disk_t * d) | |
982 | +{ | |
983 | + return d->state & (1 << MD_DISK_REMOVED); | |
984 | +} | |
985 | + | |
986 | +extern inline void mark_disk_faulty(mdp_disk_t * d) | |
987 | +{ | |
988 | + d->state |= (1 << MD_DISK_FAULTY); | |
989 | +} | |
990 | + | |
991 | +extern inline void mark_disk_active(mdp_disk_t * d) | |
992 | +{ | |
993 | + d->state |= (1 << MD_DISK_ACTIVE); | |
994 | +} | |
995 | + | |
996 | +extern inline void mark_disk_sync(mdp_disk_t * d) | |
997 | +{ | |
998 | + d->state |= (1 << MD_DISK_SYNC); | |
999 | +} | |
1000 | + | |
1001 | +extern inline void mark_disk_spare(mdp_disk_t * d) | |
1002 | +{ | |
1003 | + d->state = 0; | |
1004 | +} | |
1005 | + | |
1006 | +extern inline void mark_disk_removed(mdp_disk_t * d) | |
1007 | +{ | |
1008 | + d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED); | |
1009 | +} | |
1010 | + | |
1011 | +extern inline void mark_disk_inactive(mdp_disk_t * d) | |
1012 | +{ | |
1013 | + d->state &= ~(1 << MD_DISK_ACTIVE); | |
1014 | +} | |
1015 | + | |
1016 | +extern inline void mark_disk_nonsync(mdp_disk_t * d) | |
1017 | +{ | |
1018 | + d->state &= ~(1 << MD_DISK_SYNC); | |
1019 | +} | |
1020 | + | |
1021 | +/* | |
1022 | + * MD's 'extended' device | |
1023 | + */ | |
1024 | +struct mdk_rdev_s | |
1025 | +{ | |
1026 | + struct md_list_head same_set; /* RAID devices within the same set */ | |
1027 | + struct md_list_head all; /* all RAID devices */ | |
1028 | + struct md_list_head pending; /* undetected RAID devices */ | |
1029 | + | |
1030 | + kdev_t dev; /* Device number */ | |
1031 | + kdev_t old_dev; /* "" when it was last imported */ | |
1032 | + int size; /* Device size (in blocks) */ | |
1033 | + mddev_t *mddev; /* RAID array if running */ | |
1034 | + unsigned long last_events; /* IO event timestamp */ | |
1035 | + | |
1036 | + struct inode *inode; /* Lock inode */ | |
1037 | + struct file filp; /* Lock file */ | |
1038 | + | |
1039 | + mdp_super_t *sb; | |
1040 | + int sb_offset; | |
1041 | + | |
1042 | + int faulty; /* if faulty do not issue IO requests */ | |
1043 | + int desc_nr; /* descriptor index in the superblock */ | |
1044 | +}; | |
1045 | + | |
1046 | + | |
1047 | +/* | |
1048 | + * disk operations in a working array: | |
1049 | + */ | |
1050 | +#define DISKOP_SPARE_INACTIVE 0 | |
1051 | +#define DISKOP_SPARE_WRITE 1 | |
1052 | +#define DISKOP_SPARE_ACTIVE 2 | |
1053 | +#define DISKOP_HOT_REMOVE_DISK 3 | |
1054 | +#define DISKOP_HOT_ADD_DISK 4 | |
1055 | + | |
1056 | +typedef struct mdk_personality_s mdk_personality_t; | |
1057 | + | |
1058 | +struct mddev_s | |
1059 | +{ | |
1060 | + void *private; | |
1061 | + mdk_personality_t *pers; | |
1062 | + int __minor; | |
1063 | + mdp_super_t *sb; | |
1064 | + int nb_dev; | |
1065 | + struct md_list_head disks; | |
1066 | + int sb_dirty; | |
1067 | + mdu_param_t param; | |
1068 | + int ro; | |
1069 | + unsigned int curr_resync; | |
1070 | + unsigned long resync_start; | |
1071 | + char *name; | |
1072 | + int recovery_running; | |
1073 | + struct semaphore reconfig_sem; | |
1074 | + struct semaphore recovery_sem; | |
1075 | + struct semaphore resync_sem; | |
1076 | + struct md_list_head all_mddevs; | |
1077 | +}; | |
1078 | + | |
1079 | +struct mdk_personality_s | |
1080 | +{ | |
1081 | + char *name; | |
1082 | + int (*map)(mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
1083 | + unsigned long *rsector, unsigned long size); | |
1084 | + int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh); | |
1085 | + void (*end_request)(struct buffer_head * bh, int uptodate); | |
1086 | + int (*run)(mddev_t *mddev); | |
1087 | + int (*stop)(mddev_t *mddev); | |
1088 | + int (*status)(char *page, mddev_t *mddev); | |
1089 | + int (*ioctl)(struct inode *inode, struct file *file, | |
1090 | + unsigned int cmd, unsigned long arg); | |
1091 | + int max_invalid_dev; | |
1092 | + int (*error_handler)(mddev_t *mddev, kdev_t dev); | |
1093 | + | |
1094 | +/* | |
1095 | + * Some personalities (RAID-1, RAID-5) can have disks hot-added and | |
1096 | + * hot-removed. Hot removal is different from failure. (failure marks | |
1097 | + * a disk inactive, but the disk is still part of the array) The interface | |
1098 | + * to such operations is the 'pers->diskop()' function, can be NULL. | |
1099 | + * | |
1100 | + * the diskop function can change the pointer pointing to the incoming | |
1101 | + * descriptor, but must do so very carefully. (currently only | |
1102 | + * SPARE_ACTIVE expects such a change) | |
1103 | + */ | |
1104 | + int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state); | |
1105 | + | |
1106 | + int (*stop_resync)(mddev_t *mddev); | |
1107 | + int (*restart_resync)(mddev_t *mddev); | |
1108 | +}; | |
1109 | + | |
1110 | + | |
1111 | +/* | |
1112 | + * Currently we index md_array directly, based on the minor | |
1113 | + * number. This will have to change to dynamic allocation | |
1114 | + * once we start supporting partitioning of md devices. | |
1115 | + */ | |
1116 | +extern inline int mdidx (mddev_t * mddev) | |
1117 | +{ | |
1118 | + return mddev->__minor; | |
1119 | +} | |
1120 | + | |
1121 | +extern inline kdev_t mddev_to_kdev(mddev_t * mddev) | |
1122 | +{ | |
1123 | + return MKDEV(MD_MAJOR, mdidx(mddev)); | |
1124 | +} | |
1125 | + | |
1126 | +extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev); | |
1127 | +extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); | |
1128 | + | |
1129 | +/* | |
1130 | + * iterates through some rdev ringlist. It's safe to remove the | |
1131 | + * current 'rdev'. Dont touch 'tmp' though. | |
1132 | + */ | |
1133 | +#define ITERATE_RDEV_GENERIC(head,field,rdev,tmp) \ | |
1134 | + \ | |
1135 | + for (tmp = head.next; \ | |
1136 | + rdev = md_list_entry(tmp, mdk_rdev_t, field), \ | |
1137 | + tmp = tmp->next, tmp->prev != &head \ | |
1138 | + ; ) | |
1139 | +/* | |
1140 | + * iterates through the 'same array disks' ringlist | |
1141 | + */ | |
1142 | +#define ITERATE_RDEV(mddev,rdev,tmp) \ | |
1143 | + ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp) | |
1144 | + | |
1145 | +/* | |
1146 | + * Same as above, but assumes that the device has rdev->desc_nr numbered | |
1147 | + * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order. | |
1148 | + */ | |
1149 | +#define ITERATE_RDEV_ORDERED(mddev,rdev,i) \ | |
1150 | + for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++) | |
1151 | + | |
1152 | + | |
1153 | +/* | |
1154 | + * Iterates through all 'RAID managed disks' | |
1155 | + */ | |
1156 | +#define ITERATE_RDEV_ALL(rdev,tmp) \ | |
1157 | + ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp) | |
1158 | + | |
1159 | +/* | |
1160 | + * Iterates through 'pending RAID disks' | |
1161 | + */ | |
1162 | +#define ITERATE_RDEV_PENDING(rdev,tmp) \ | |
1163 | + ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp) | |
1164 | + | |
1165 | +/* | |
1166 | + * iterates through all used mddevs in the system. | |
1167 | + */ | |
1168 | +#define ITERATE_MDDEV(mddev,tmp) \ | |
1169 | + \ | |
1170 | + for (tmp = all_mddevs.next; \ | |
1171 | + mddev = md_list_entry(tmp, mddev_t, all_mddevs), \ | |
1172 | + tmp = tmp->next, tmp->prev != &all_mddevs \ | |
1173 | + ; ) | |
1174 | + | |
1175 | +extern inline int lock_mddev (mddev_t * mddev) | |
1176 | +{ | |
1177 | + return down_interruptible(&mddev->reconfig_sem); | |
1178 | +} | |
1179 | + | |
1180 | +extern inline void unlock_mddev (mddev_t * mddev) | |
1181 | +{ | |
1182 | + up(&mddev->reconfig_sem); | |
1183 | +} | |
1184 | + | |
1185 | +#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \ | |
1186 | + x = y; y = __tmp; } while (0) | |
1187 | + | |
1188 | +typedef struct mdk_thread_s { | |
1189 | + void (*run) (void *data); | |
1190 | + void *data; | |
1191 | + struct wait_queue *wqueue; | |
1192 | + unsigned long flags; | |
1193 | + struct semaphore *sem; | |
1194 | + struct task_struct *tsk; | |
1195 | + const char *name; | |
1196 | +} mdk_thread_t; | |
1197 | + | |
1198 | +#define THREAD_WAKEUP 0 | |
1199 | + | |
1200 | +typedef struct dev_name_s { | |
1201 | + struct md_list_head list; | |
1202 | + kdev_t dev; | |
1203 | + char name [MAX_DISKNAME_LEN]; | |
1204 | +} dev_name_t; | |
1205 | + | |
1206 | +#endif _MD_K_H | |
1207 | + | |
1208 | --- linux/include/linux/raid/md_p.h.orig Fri Nov 23 11:18:16 2001 | |
1209 | +++ linux/include/linux/raid/md_p.h Fri Nov 23 11:18:16 2001 | |
1210 | @@ -0,0 +1,161 @@ | |
1211 | +/* | |
1212 | + md_p.h : physical layout of Linux RAID devices | |
1213 | + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman | |
1214 | + | |
1215 | + This program is free software; you can redistribute it and/or modify | |
1216 | + it under the terms of the GNU General Public License as published by | |
1217 | + the Free Software Foundation; either version 2, or (at your option) | |
1218 | + any later version. | |
1219 | + | |
1220 | + You should have received a copy of the GNU General Public License | |
1221 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
1222 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
1223 | +*/ | |
1224 | + | |
1225 | +#ifndef _MD_P_H | |
1226 | +#define _MD_P_H | |
1227 | + | |
1228 | +/* | |
1229 | + * RAID superblock. | |
1230 | + * | |
1231 | + * The RAID superblock maintains some statistics on each RAID configuration. | |
1232 | + * Each real device in the RAID set contains it near the end of the device. | |
1233 | + * Some of the ideas are copied from the ext2fs implementation. | |
1234 | + * | |
1235 | + * We currently use 4096 bytes as follows: | |
1236 | + * | |
1237 | + * word offset function | |
1238 | + * | |
1239 | + * 0 - 31 Constant generic RAID device information. | |
1240 | + * 32 - 63 Generic state information. | |
1241 | + * 64 - 127 Personality specific information. | |
1242 | + * 128 - 511 12 32-words descriptors of the disks in the raid set. | |
1243 | + * 512 - 911 Reserved. | |
1244 | + * 912 - 1023 Disk specific descriptor. | |
1245 | + */ | |
1246 | + | |
1247 | +/* | |
1248 | + * If x is the real device size in bytes, we return an apparent size of: | |
1249 | + * | |
1250 | + * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES | |
1251 | + * | |
1252 | + * and place the 4kB superblock at offset y. | |
1253 | + */ | |
1254 | +#define MD_RESERVED_BYTES (64 * 1024) | |
1255 | +#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) | |
1256 | +#define MD_RESERVED_BLOCKS (MD_RESERVED_BYTES / BLOCK_SIZE) | |
1257 | + | |
1258 | +#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) | |
1259 | +#define MD_NEW_SIZE_BLOCKS(x) ((x & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS) | |
1260 | + | |
1261 | +#define MD_SB_BYTES 4096 | |
1262 | +#define MD_SB_WORDS (MD_SB_BYTES / 4) | |
1263 | +#define MD_SB_BLOCKS (MD_SB_BYTES / BLOCK_SIZE) | |
1264 | +#define MD_SB_SECTORS (MD_SB_BYTES / 512) | |
1265 | + | |
1266 | +/* | |
1267 | + * The following are counted in 32-bit words | |
1268 | + */ | |
1269 | +#define MD_SB_GENERIC_OFFSET 0 | |
1270 | +#define MD_SB_PERSONALITY_OFFSET 64 | |
1271 | +#define MD_SB_DISKS_OFFSET 128 | |
1272 | +#define MD_SB_DESCRIPTOR_OFFSET 992 | |
1273 | + | |
1274 | +#define MD_SB_GENERIC_CONSTANT_WORDS 32 | |
1275 | +#define MD_SB_GENERIC_STATE_WORDS 32 | |
1276 | +#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) | |
1277 | +#define MD_SB_PERSONALITY_WORDS 64 | |
1278 | +#define MD_SB_DISKS_WORDS 384 | |
1279 | +#define MD_SB_DESCRIPTOR_WORDS 32 | |
1280 | +#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) | |
1281 | +#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) | |
1282 | +#define MD_SB_DISKS (MD_SB_DISKS_WORDS / MD_SB_DESCRIPTOR_WORDS) | |
1283 | + | |
1284 | +/* | |
1285 | + * Device "operational" state bits | |
1286 | + */ | |
1287 | +#define MD_DISK_FAULTY 0 /* disk is faulty / operational */ | |
1288 | +#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ | |
1289 | +#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ | |
1290 | +#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ | |
1291 | + | |
1292 | +typedef struct mdp_device_descriptor_s { | |
1293 | + __u32 number; /* 0 Device number in the entire set */ | |
1294 | + __u32 major; /* 1 Device major number */ | |
1295 | + __u32 minor; /* 2 Device minor number */ | |
1296 | + __u32 raid_disk; /* 3 The role of the device in the raid set */ | |
1297 | + __u32 state; /* 4 Operational state */ | |
1298 | + __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; | |
1299 | +} mdp_disk_t; | |
1300 | + | |
1301 | +#define MD_SB_MAGIC 0xa92b4efc | |
1302 | + | |
1303 | +/* | |
1304 | + * Superblock state bits | |
1305 | + */ | |
1306 | +#define MD_SB_CLEAN 0 | |
1307 | +#define MD_SB_ERRORS 1 | |
1308 | + | |
1309 | +typedef struct mdp_superblock_s { | |
1310 | + /* | |
1311 | + * Constant generic information | |
1312 | + */ | |
1313 | + __u32 md_magic; /* 0 MD identifier */ | |
1314 | + __u32 major_version; /* 1 major version to which the set conforms */ | |
1315 | + __u32 minor_version; /* 2 minor version ... */ | |
1316 | + __u32 patch_version; /* 3 patchlevel version ... */ | |
1317 | + __u32 gvalid_words; /* 4 Number of used words in this section */ | |
1318 | + __u32 set_uuid0; /* 5 Raid set identifier */ | |
1319 | + __u32 ctime; /* 6 Creation time */ | |
1320 | + __u32 level; /* 7 Raid personality */ | |
1321 | + __u32 size; /* 8 Apparent size of each individual disk */ | |
1322 | + __u32 nr_disks; /* 9 total disks in the raid set */ | |
1323 | + __u32 raid_disks; /* 10 disks in a fully functional raid set */ | |
1324 | + __u32 md_minor; /* 11 preferred MD minor device number */ | |
1325 | + __u32 not_persistent; /* 12 does it have a persistent superblock */ | |
1326 | + __u32 set_uuid1; /* 13 Raid set identifier #2 */ | |
1327 | + __u32 set_uuid2; /* 14 Raid set identifier #3 */ | |
1328 | + __u32 set_uuid3; /* 14 Raid set identifier #4 */ | |
1329 | + __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16]; | |
1330 | + | |
1331 | + /* | |
1332 | + * Generic state information | |
1333 | + */ | |
1334 | + __u32 utime; /* 0 Superblock update time */ | |
1335 | + __u32 state; /* 1 State bits (clean, ...) */ | |
1336 | + __u32 active_disks; /* 2 Number of currently active disks */ | |
1337 | + __u32 working_disks; /* 3 Number of working disks */ | |
1338 | + __u32 failed_disks; /* 4 Number of failed disks */ | |
1339 | + __u32 spare_disks; /* 5 Number of spare disks */ | |
1340 | + __u32 sb_csum; /* 6 checksum of the whole superblock */ | |
1341 | + __u64 events; /* 7 number of superblock updates (64-bit!) */ | |
1342 | + __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9]; | |
1343 | + | |
1344 | + /* | |
1345 | + * Personality information | |
1346 | + */ | |
1347 | + __u32 layout; /* 0 the array's physical layout */ | |
1348 | + __u32 chunk_size; /* 1 chunk size in bytes */ | |
1349 | + __u32 root_pv; /* 2 LV root PV */ | |
1350 | + __u32 root_block; /* 3 LV root block */ | |
1351 | + __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4]; | |
1352 | + | |
1353 | + /* | |
1354 | + * Disks information | |
1355 | + */ | |
1356 | + mdp_disk_t disks[MD_SB_DISKS]; | |
1357 | + | |
1358 | + /* | |
1359 | + * Reserved | |
1360 | + */ | |
1361 | + __u32 reserved[MD_SB_RESERVED_WORDS]; | |
1362 | + | |
1363 | + /* | |
1364 | + * Active descriptor | |
1365 | + */ | |
1366 | + mdp_disk_t this_disk; | |
1367 | + | |
1368 | +} mdp_super_t; | |
1369 | + | |
1370 | +#endif _MD_P_H | |
1371 | + | |
1372 | --- linux/include/linux/raid/md_u.h.orig Fri Nov 23 11:18:16 2001 | |
1373 | +++ linux/include/linux/raid/md_u.h Fri Nov 23 11:18:16 2001 | |
1374 | @@ -0,0 +1,115 @@ | |
1375 | +/* | |
1376 | + md_u.h : user <=> kernel API between Linux raidtools and RAID drivers | |
1377 | + Copyright (C) 1998 Ingo Molnar | |
1378 | + | |
1379 | + This program is free software; you can redistribute it and/or modify | |
1380 | + it under the terms of the GNU General Public License as published by | |
1381 | + the Free Software Foundation; either version 2, or (at your option) | |
1382 | + any later version. | |
1383 | + | |
1384 | + You should have received a copy of the GNU General Public License | |
1385 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
1386 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
1387 | +*/ | |
1388 | + | |
1389 | +#ifndef _MD_U_H | |
1390 | +#define _MD_U_H | |
1391 | + | |
1392 | +/* ioctls */ | |
1393 | + | |
1394 | +/* status */ | |
1395 | +#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) | |
1396 | +#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) | |
1397 | +#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) | |
1398 | +#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) | |
1399 | + | |
1400 | +/* configuration */ | |
1401 | +#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) | |
1402 | +#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t) | |
1403 | +#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22) | |
1404 | +#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t) | |
1405 | +#define SET_DISK_INFO _IO (MD_MAJOR, 0x24) | |
1406 | +#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25) | |
1407 | +#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26) | |
1408 | +#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) | |
1409 | +#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) | |
1410 | +#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) | |
1411 | + | |
1412 | +/* usage */ | |
1413 | +#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) | |
1414 | +#define START_ARRAY _IO (MD_MAJOR, 0x31) | |
1415 | +#define STOP_ARRAY _IO (MD_MAJOR, 0x32) | |
1416 | +#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) | |
1417 | +#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) | |
1418 | + | |
1419 | +typedef struct mdu_version_s { | |
1420 | + int major; | |
1421 | + int minor; | |
1422 | + int patchlevel; | |
1423 | +} mdu_version_t; | |
1424 | + | |
1425 | +typedef struct mdu_array_info_s { | |
1426 | + /* | |
1427 | + * Generic constant information | |
1428 | + */ | |
1429 | + int major_version; | |
1430 | + int minor_version; | |
1431 | + int patch_version; | |
1432 | + int ctime; | |
1433 | + int level; | |
1434 | + int size; | |
1435 | + int nr_disks; | |
1436 | + int raid_disks; | |
1437 | + int md_minor; | |
1438 | + int not_persistent; | |
1439 | + | |
1440 | + /* | |
1441 | + * Generic state information | |
1442 | + */ | |
1443 | + int utime; /* 0 Superblock update time */ | |
1444 | + int state; /* 1 State bits (clean, ...) */ | |
1445 | + int active_disks; /* 2 Number of currently active disks */ | |
1446 | + int working_disks; /* 3 Number of working disks */ | |
1447 | + int failed_disks; /* 4 Number of failed disks */ | |
1448 | + int spare_disks; /* 5 Number of spare disks */ | |
1449 | + | |
1450 | + /* | |
1451 | + * Personality information | |
1452 | + */ | |
1453 | + int layout; /* 0 the array's physical layout */ | |
1454 | + int chunk_size; /* 1 chunk size in bytes */ | |
1455 | + | |
1456 | +} mdu_array_info_t; | |
1457 | + | |
1458 | +typedef struct mdu_disk_info_s { | |
1459 | + /* | |
1460 | + * configuration/status of one particular disk | |
1461 | + */ | |
1462 | + int number; | |
1463 | + int major; | |
1464 | + int minor; | |
1465 | + int raid_disk; | |
1466 | + int state; | |
1467 | + | |
1468 | +} mdu_disk_info_t; | |
1469 | + | |
1470 | +typedef struct mdu_start_info_s { | |
1471 | + /* | |
1472 | + * configuration/status of one particular disk | |
1473 | + */ | |
1474 | + int major; | |
1475 | + int minor; | |
1476 | + int raid_disk; | |
1477 | + int state; | |
1478 | + | |
1479 | +} mdu_start_info_t; | |
1480 | + | |
1481 | +typedef struct mdu_param_s | |
1482 | +{ | |
1483 | + int personality; /* 1,2,3,4 */ | |
1484 | + int chunk_size; /* in bytes */ | |
1485 | + int max_fault; /* unused for now */ | |
1486 | +} mdu_param_t; | |
1487 | + | |
1488 | +#endif _MD_U_H | |
1489 | + | |
1490 | --- linux/include/linux/raid/raid0.h.orig Fri Nov 23 11:18:16 2001 | |
1491 | +++ linux/include/linux/raid/raid0.h Fri Nov 23 11:18:16 2001 | |
1492 | @@ -0,0 +1,33 @@ | |
1493 | +#ifndef _RAID0_H | |
1494 | +#define _RAID0_H | |
1495 | + | |
1496 | +#include <linux/raid/md.h> | |
1497 | + | |
1498 | +struct strip_zone | |
1499 | +{ | |
1500 | + int zone_offset; /* Zone offset in md_dev */ | |
1501 | + int dev_offset; /* Zone offset in real dev */ | |
1502 | + int size; /* Zone size */ | |
1503 | + int nb_dev; /* # of devices attached to the zone */ | |
1504 | + mdk_rdev_t *dev[MAX_REAL]; /* Devices attached to the zone */ | |
1505 | +}; | |
1506 | + | |
1507 | +struct raid0_hash | |
1508 | +{ | |
1509 | + struct strip_zone *zone0, *zone1; | |
1510 | +}; | |
1511 | + | |
1512 | +struct raid0_private_data | |
1513 | +{ | |
1514 | + struct raid0_hash *hash_table; /* Dynamically allocated */ | |
1515 | + struct strip_zone *strip_zone; /* This one too */ | |
1516 | + int nr_strip_zones; | |
1517 | + struct strip_zone *smallest; | |
1518 | + int nr_zones; | |
1519 | +}; | |
1520 | + | |
1521 | +typedef struct raid0_private_data raid0_conf_t; | |
1522 | + | |
1523 | +#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private) | |
1524 | + | |
1525 | +#endif | |
1526 | --- linux/include/linux/raid/raid1.h.orig Fri Nov 23 11:18:16 2001 | |
1527 | +++ linux/include/linux/raid/raid1.h Fri Nov 23 11:18:16 2001 | |
1528 | @@ -0,0 +1,64 @@ | |
1529 | +#ifndef _RAID1_H | |
1530 | +#define _RAID1_H | |
1531 | + | |
1532 | +#include <linux/raid/md.h> | |
1533 | + | |
1534 | +struct mirror_info { | |
1535 | + int number; | |
1536 | + int raid_disk; | |
1537 | + kdev_t dev; | |
1538 | + int next; | |
1539 | + int sect_limit; | |
1540 | + | |
1541 | + /* | |
1542 | + * State bits: | |
1543 | + */ | |
1544 | + int operational; | |
1545 | + int write_only; | |
1546 | + int spare; | |
1547 | + | |
1548 | + int used_slot; | |
1549 | +}; | |
1550 | + | |
1551 | +struct raid1_private_data { | |
1552 | + mddev_t *mddev; | |
1553 | + struct mirror_info mirrors[MD_SB_DISKS]; | |
1554 | + int nr_disks; | |
1555 | + int raid_disks; | |
1556 | + int working_disks; | |
1557 | + int last_used; | |
1558 | + unsigned long next_sect; | |
1559 | + int sect_count; | |
1560 | + mdk_thread_t *thread, *resync_thread; | |
1561 | + int resync_mirrors; | |
1562 | + struct mirror_info *spare; | |
1563 | +}; | |
1564 | + | |
1565 | +typedef struct raid1_private_data raid1_conf_t; | |
1566 | + | |
1567 | +/* | |
1568 | + * this is the only point in the RAID code where we violate | |
1569 | + * C type safety. mddev->private is an 'opaque' pointer. | |
1570 | + */ | |
1571 | +#define mddev_to_conf(mddev) ((raid1_conf_t *) mddev->private) | |
1572 | + | |
1573 | +/* | |
1574 | + * this is our 'private' 'collective' RAID1 buffer head. | |
1575 | + * it contains information about what kind of IO operations were started | |
1576 | + * for this RAID1 operation, and about their status: | |
1577 | + */ | |
1578 | + | |
1579 | +struct raid1_bh { | |
1580 | + atomic_t remaining; /* 'have we finished' count, | |
1581 | + * used from IRQ handlers | |
1582 | + */ | |
1583 | + int cmd; | |
1584 | + unsigned long state; | |
1585 | + mddev_t *mddev; | |
1586 | + struct buffer_head *master_bh; | |
1587 | + struct buffer_head *mirror_bh [MD_SB_DISKS]; | |
1588 | + struct buffer_head bh_req; | |
1589 | + struct buffer_head *next_retry; | |
1590 | +}; | |
1591 | + | |
1592 | +#endif | |
1593 | --- linux/include/linux/raid/raid5.h.orig Fri Nov 23 11:18:16 2001 | |
1594 | +++ linux/include/linux/raid/raid5.h Fri Nov 23 11:18:16 2001 | |
1595 | @@ -0,0 +1,113 @@ | |
1596 | +#ifndef _RAID5_H | |
1597 | +#define _RAID5_H | |
1598 | + | |
1599 | +#include <linux/raid/md.h> | |
1600 | +#include <linux/raid/xor.h> | |
1601 | + | |
1602 | +struct disk_info { | |
1603 | + kdev_t dev; | |
1604 | + int operational; | |
1605 | + int number; | |
1606 | + int raid_disk; | |
1607 | + int write_only; | |
1608 | + int spare; | |
1609 | + int used_slot; | |
1610 | +}; | |
1611 | + | |
1612 | +struct stripe_head { | |
1613 | + md_spinlock_t stripe_lock; | |
1614 | + struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ | |
1615 | + struct stripe_head *free_next; /* pool of free sh's */ | |
1616 | + struct buffer_head *buffer_pool; /* pool of free buffers */ | |
1617 | + struct buffer_head *bh_pool; /* pool of free bh's */ | |
1618 | + struct raid5_private_data *raid_conf; | |
1619 | + struct buffer_head *bh_old[MD_SB_DISKS]; /* disk image */ | |
1620 | + struct buffer_head *bh_new[MD_SB_DISKS]; /* buffers of the MD device (present in buffer cache) */ | |
1621 | + struct buffer_head *bh_copy[MD_SB_DISKS]; /* copy on write of bh_new (bh_new can change from under us) */ | |
1622 | + struct buffer_head *bh_req[MD_SB_DISKS]; /* copy of bh_new (only the buffer heads), queued to the lower levels */ | |
1623 | + int cmd_new[MD_SB_DISKS]; /* READ/WRITE for new */ | |
1624 | + int new[MD_SB_DISKS]; /* buffer added since the last handle_stripe() */ | |
1625 | + unsigned long sector; /* sector of this row */ | |
1626 | + int size; /* buffers size */ | |
1627 | + int pd_idx; /* parity disk index */ | |
1628 | + atomic_t nr_pending; /* nr of pending cmds */ | |
1629 | + unsigned long state; /* state flags */ | |
1630 | + int cmd; /* stripe cmd */ | |
1631 | + int count; /* nr of waiters */ | |
1632 | + int write_method; /* reconstruct-write / read-modify-write */ | |
1633 | + int phase; /* PHASE_BEGIN, ..., PHASE_COMPLETE */ | |
1634 | + struct wait_queue *wait; /* processes waiting for this stripe */ | |
1635 | +}; | |
1636 | + | |
1637 | +/* | |
1638 | + * Phase | |
1639 | + */ | |
1640 | +#define PHASE_BEGIN 0 | |
1641 | +#define PHASE_READ_OLD 1 | |
1642 | +#define PHASE_WRITE 2 | |
1643 | +#define PHASE_READ 3 | |
1644 | +#define PHASE_COMPLETE 4 | |
1645 | + | |
1646 | +/* | |
1647 | + * Write method | |
1648 | + */ | |
1649 | +#define METHOD_NONE 0 | |
1650 | +#define RECONSTRUCT_WRITE 1 | |
1651 | +#define READ_MODIFY_WRITE 2 | |
1652 | + | |
1653 | +/* | |
1654 | + * Stripe state | |
1655 | + */ | |
1656 | +#define STRIPE_LOCKED 0 | |
1657 | +#define STRIPE_ERROR 1 | |
1658 | + | |
1659 | +/* | |
1660 | + * Stripe commands | |
1661 | + */ | |
1662 | +#define STRIPE_NONE 0 | |
1663 | +#define STRIPE_WRITE 1 | |
1664 | +#define STRIPE_READ 2 | |
1665 | + | |
1666 | +struct raid5_private_data { | |
1667 | + struct stripe_head **stripe_hashtbl; | |
1668 | + mddev_t *mddev; | |
1669 | + mdk_thread_t *thread, *resync_thread; | |
1670 | + struct disk_info disks[MD_SB_DISKS]; | |
1671 | + struct disk_info *spare; | |
1672 | + int buffer_size; | |
1673 | + int chunk_size, level, algorithm; | |
1674 | + int raid_disks, working_disks, failed_disks; | |
1675 | + int sector_count; | |
1676 | + unsigned long next_sector; | |
1677 | + atomic_t nr_handle; | |
1678 | + struct stripe_head *next_free_stripe; | |
1679 | + int nr_stripes; | |
1680 | + int resync_parity; | |
1681 | + int max_nr_stripes; | |
1682 | + int clock; | |
1683 | + int nr_hashed_stripes; | |
1684 | + int nr_locked_stripes; | |
1685 | + int nr_pending_stripes; | |
1686 | + int nr_cached_stripes; | |
1687 | + | |
1688 | + /* | |
1689 | + * Free stripes pool | |
1690 | + */ | |
1691 | + int nr_free_sh; | |
1692 | + struct stripe_head *free_sh_list; | |
1693 | + struct wait_queue *wait_for_stripe; | |
1694 | +}; | |
1695 | + | |
1696 | +typedef struct raid5_private_data raid5_conf_t; | |
1697 | + | |
1698 | +#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private) | |
1699 | + | |
1700 | +/* | |
1701 | + * Our supported algorithms | |
1702 | + */ | |
1703 | +#define ALGORITHM_LEFT_ASYMMETRIC 0 | |
1704 | +#define ALGORITHM_RIGHT_ASYMMETRIC 1 | |
1705 | +#define ALGORITHM_LEFT_SYMMETRIC 2 | |
1706 | +#define ALGORITHM_RIGHT_SYMMETRIC 3 | |
1707 | + | |
1708 | +#endif | |
1709 | --- linux/include/linux/raid/translucent.h.orig Fri Nov 23 11:18:16 2001 | |
1710 | +++ linux/include/linux/raid/translucent.h Fri Nov 23 11:18:16 2001 | |
1711 | @@ -0,0 +1,23 @@ | |
1712 | +#ifndef _TRANSLUCENT_H | |
1713 | +#define _TRANSLUCENT_H | |
1714 | + | |
1715 | +#include <linux/raid/md.h> | |
1716 | + | |
1717 | +typedef struct dev_info dev_info_t; | |
1718 | + | |
1719 | +struct dev_info { | |
1720 | + kdev_t dev; | |
1721 | + int size; | |
1722 | +}; | |
1723 | + | |
1724 | +struct translucent_private_data | |
1725 | +{ | |
1726 | + dev_info_t disks[MD_SB_DISKS]; | |
1727 | +}; | |
1728 | + | |
1729 | + | |
1730 | +typedef struct translucent_private_data translucent_conf_t; | |
1731 | + | |
1732 | +#define mddev_to_conf(mddev) ((translucent_conf_t *) mddev->private) | |
1733 | + | |
1734 | +#endif | |
1735 | --- linux/include/linux/raid/xor.h.orig Fri Nov 23 11:18:16 2001 | |
1736 | +++ linux/include/linux/raid/xor.h Fri Nov 23 11:18:16 2001 | |
1737 | @@ -0,0 +1,12 @@ | |
1738 | +#ifndef _XOR_H | |
1739 | +#define _XOR_H | |
1740 | + | |
1741 | +#include <linux/raid/md.h> | |
1742 | + | |
1743 | +#define MAX_XOR_BLOCKS 5 | |
1744 | + | |
1745 | +extern void calibrate_xor_block(void); | |
1746 | +extern void (*xor_block)(unsigned int count, | |
1747 | + struct buffer_head **bh_ptr); | |
1748 | + | |
1749 | +#endif | |
1750 | --- linux/include/linux/fs.h.orig Fri Nov 23 11:17:45 2001 | |
1751 | +++ linux/include/linux/fs.h Fri Nov 23 11:18:16 2001 | |
1752 | @@ -191,6 +191,7 @@ | |
1753 | #define BH_Req 3 /* 0 if the buffer has been invalidated */ | |
1754 | #define BH_Protected 6 /* 1 if the buffer is protected */ | |
1755 | #define BH_Wait_IO 7 /* 1 if we should throttle on this buffer */ | |
1756 | +#define BH_LowPrio 8 /* 1 if the buffer is lowprio */ | |
1757 | ||
1758 | /* | |
1759 | * Try to keep the most commonly used fields in single cache lines (16 | |
1760 | @@ -784,6 +785,7 @@ | |
1761 | extern void refile_buffer(struct buffer_head * buf); | |
1762 | extern void set_writetime(struct buffer_head * buf, int flag); | |
1763 | extern int try_to_free_buffers(struct page *, int); | |
1764 | +extern void cache_drop_behind(struct buffer_head *bh); | |
1765 | ||
1766 | extern int nr_buffers; | |
1767 | extern long buffermem; | |
1768 | @@ -814,6 +816,25 @@ | |
1769 | } | |
1770 | } | |
1771 | ||
1772 | +extern inline void mark_buffer_highprio(struct buffer_head * bh) | |
1773 | +{ | |
1774 | + clear_bit(BH_LowPrio, &bh->b_state); | |
1775 | +} | |
1776 | + | |
1777 | +extern inline void mark_buffer_lowprio(struct buffer_head * bh) | |
1778 | +{ | |
1779 | + /* | |
1780 | + * dirty buffers cannot be marked lowprio. | |
1781 | + */ | |
1782 | + if (!buffer_dirty(bh)) | |
1783 | + set_bit(BH_LowPrio, &bh->b_state); | |
1784 | +} | |
1785 | + | |
1786 | +static inline int buffer_lowprio(struct buffer_head * bh) | |
1787 | +{ | |
1788 | + return test_bit(BH_LowPrio, &bh->b_state); | |
1789 | +} | |
1790 | + | |
1791 | extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) | |
1792 | { | |
1793 | if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { | |
1794 | @@ -821,6 +842,23 @@ | |
1795 | if (bh->b_list != BUF_DIRTY) | |
1796 | refile_buffer(bh); | |
1797 | } | |
1798 | + /* | |
1799 | + * if a buffer gets marked dirty then it has to lose | |
1800 | + * it's lowprio state. | |
1801 | + */ | |
1802 | + mark_buffer_highprio(bh); | |
1803 | +} | |
1804 | + | |
1805 | +extern inline void mark_buffer_dirty_lowprio(struct buffer_head * bh) | |
1806 | +{ | |
1807 | + if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { | |
1808 | + if (bh->b_list != BUF_DIRTY) | |
1809 | + refile_buffer(bh); | |
1810 | + /* | |
1811 | + * Mark it lowprio only if it was not dirty before! | |
1812 | + */ | |
1813 | + set_bit(BH_LowPrio, &bh->b_state); | |
1814 | + } | |
1815 | } | |
1816 | ||
1817 | extern int check_disk_change(kdev_t dev); | |
1818 | @@ -899,6 +937,7 @@ | |
1819 | extern struct buffer_head * find_buffer(kdev_t dev, int block, int size); | |
1820 | extern void ll_rw_block(int, int, struct buffer_head * bh[]); | |
1821 | extern int is_read_only(kdev_t); | |
1822 | +extern int is_device_idle(kdev_t); | |
1823 | extern void __brelse(struct buffer_head *); | |
1824 | extern inline void brelse(struct buffer_head *buf) | |
1825 | { | |
1826 | @@ -914,8 +953,12 @@ | |
1827 | extern void set_blocksize(kdev_t dev, int size); | |
1828 | extern unsigned int get_hardblocksize(kdev_t dev); | |
1829 | extern struct buffer_head * bread(kdev_t dev, int block, int size); | |
1830 | +extern struct buffer_head * buffer_ready (kdev_t dev, int block, int size); | |
1831 | +extern void bread_ahead (kdev_t dev, int block, int size); | |
1832 | extern struct buffer_head * breada(kdev_t dev,int block, int size, | |
1833 | unsigned int pos, unsigned int filesize); | |
1834 | +extern struct buffer_head * breada_blocks(kdev_t dev,int block, | |
1835 | + int size, int blocks); | |
1836 | ||
1837 | extern int brw_page(int, struct page *, kdev_t, int [], int, int); | |
1838 | ||
1839 | --- linux/include/linux/blkdev.h.orig Mon Dec 11 01:49:44 2000 | |
1840 | +++ linux/include/linux/blkdev.h Fri Nov 23 11:18:16 2001 | |
1841 | @@ -91,8 +91,9 @@ | |
1842 | extern void make_request(int major,int rw, struct buffer_head * bh); | |
1843 | ||
1844 | /* md needs this function to remap requests */ | |
1845 | -extern int md_map (int minor, kdev_t *rdev, unsigned long *rsector, unsigned long size); | |
1846 | -extern int md_make_request (int minor, int rw, struct buffer_head * bh); | |
1847 | +extern int md_map (kdev_t dev, kdev_t *rdev, | |
1848 | + unsigned long *rsector, unsigned long size); | |
1849 | +extern int md_make_request (struct buffer_head * bh, int rw); | |
1850 | extern int md_error (kdev_t mddev, kdev_t rdev); | |
1851 | ||
1852 | extern int * blk_size[MAX_BLKDEV]; | |
1853 | --- linux/include/linux/md.h.orig Fri May 8 09:17:13 1998 | |
1854 | +++ linux/include/linux/md.h Fri Nov 23 11:18:18 2001 | |
1855 | @@ -1,300 +0,0 @@ | |
1856 | -/* | |
1857 | - md.h : Multiple Devices driver for Linux | |
1858 | - Copyright (C) 1994-96 Marc ZYNGIER | |
1859 | - <zyngier@ufr-info-p7.ibp.fr> or | |
1860 | - <maz@gloups.fdn.fr> | |
1861 | - | |
1862 | - This program is free software; you can redistribute it and/or modify | |
1863 | - it under the terms of the GNU General Public License as published by | |
1864 | - the Free Software Foundation; either version 2, or (at your option) | |
1865 | - any later version. | |
1866 | - | |
1867 | - You should have received a copy of the GNU General Public License | |
1868 | - (for example /usr/src/linux/COPYING); if not, write to the Free | |
1869 | - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
1870 | -*/ | |
1871 | - | |
1872 | -#ifndef _MD_H | |
1873 | -#define _MD_H | |
1874 | - | |
1875 | -#include <linux/major.h> | |
1876 | -#include <linux/ioctl.h> | |
1877 | -#include <linux/types.h> | |
1878 | - | |
1879 | -/* | |
1880 | - * Different major versions are not compatible. | |
1881 | - * Different minor versions are only downward compatible. | |
1882 | - * Different patchlevel versions are downward and upward compatible. | |
1883 | - */ | |
1884 | -#define MD_MAJOR_VERSION 0 | |
1885 | -#define MD_MINOR_VERSION 36 | |
1886 | -#define MD_PATCHLEVEL_VERSION 6 | |
1887 | - | |
1888 | -#define MD_DEFAULT_DISK_READAHEAD (256 * 1024) | |
1889 | - | |
1890 | -/* ioctls */ | |
1891 | -#define REGISTER_DEV _IO (MD_MAJOR, 1) | |
1892 | -#define START_MD _IO (MD_MAJOR, 2) | |
1893 | -#define STOP_MD _IO (MD_MAJOR, 3) | |
1894 | -#define REGISTER_DEV_NEW _IO (MD_MAJOR, 4) | |
1895 | - | |
1896 | -/* | |
1897 | - personalities : | |
1898 | - Byte 0 : Chunk size factor | |
1899 | - Byte 1 : Fault tolerance count for each physical device | |
1900 | - ( 0 means no fault tolerance, | |
1901 | - 0xFF means always tolerate faults), not used by now. | |
1902 | - Byte 2 : Personality | |
1903 | - Byte 3 : Reserved. | |
1904 | - */ | |
1905 | - | |
1906 | -#define FAULT_SHIFT 8 | |
1907 | -#define PERSONALITY_SHIFT 16 | |
1908 | - | |
1909 | -#define FACTOR_MASK 0x000000FFUL | |
1910 | -#define FAULT_MASK 0x0000FF00UL | |
1911 | -#define PERSONALITY_MASK 0x00FF0000UL | |
1912 | - | |
1913 | -#define MD_RESERVED 0 /* Not used by now */ | |
1914 | -#define LINEAR (1UL << PERSONALITY_SHIFT) | |
1915 | -#define STRIPED (2UL << PERSONALITY_SHIFT) | |
1916 | -#define RAID0 STRIPED | |
1917 | -#define RAID1 (3UL << PERSONALITY_SHIFT) | |
1918 | -#define RAID5 (4UL << PERSONALITY_SHIFT) | |
1919 | -#define MAX_PERSONALITY 5 | |
1920 | - | |
1921 | -/* | |
1922 | - * MD superblock. | |
1923 | - * | |
1924 | - * The MD superblock maintains some statistics on each MD configuration. | |
1925 | - * Each real device in the MD set contains it near the end of the device. | |
1926 | - * Some of the ideas are copied from the ext2fs implementation. | |
1927 | - * | |
1928 | - * We currently use 4096 bytes as follows: | |
1929 | - * | |
1930 | - * word offset function | |
1931 | - * | |
1932 | - * 0 - 31 Constant generic MD device information. | |
1933 | - * 32 - 63 Generic state information. | |
1934 | - * 64 - 127 Personality specific information. | |
1935 | - * 128 - 511 12 32-words descriptors of the disks in the raid set. | |
1936 | - * 512 - 911 Reserved. | |
1937 | - * 912 - 1023 Disk specific descriptor. | |
1938 | - */ | |
1939 | - | |
1940 | -/* | |
1941 | - * If x is the real device size in bytes, we return an apparent size of: | |
1942 | - * | |
1943 | - * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES | |
1944 | - * | |
1945 | - * and place the 4kB superblock at offset y. | |
1946 | - */ | |
1947 | -#define MD_RESERVED_BYTES (64 * 1024) | |
1948 | -#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) | |
1949 | -#define MD_RESERVED_BLOCKS (MD_RESERVED_BYTES / BLOCK_SIZE) | |
1950 | - | |
1951 | -#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) | |
1952 | -#define MD_NEW_SIZE_BLOCKS(x) ((x & ~(MD_RESERVED_BLOCKS - 1)) - MD_RESERVED_BLOCKS) | |
1953 | - | |
1954 | -#define MD_SB_BYTES 4096 | |
1955 | -#define MD_SB_WORDS (MD_SB_BYTES / 4) | |
1956 | -#define MD_SB_BLOCKS (MD_SB_BYTES / BLOCK_SIZE) | |
1957 | -#define MD_SB_SECTORS (MD_SB_BYTES / 512) | |
1958 | - | |
1959 | -/* | |
1960 | - * The following are counted in 32-bit words | |
1961 | - */ | |
1962 | -#define MD_SB_GENERIC_OFFSET 0 | |
1963 | -#define MD_SB_PERSONALITY_OFFSET 64 | |
1964 | -#define MD_SB_DISKS_OFFSET 128 | |
1965 | -#define MD_SB_DESCRIPTOR_OFFSET 992 | |
1966 | - | |
1967 | -#define MD_SB_GENERIC_CONSTANT_WORDS 32 | |
1968 | -#define MD_SB_GENERIC_STATE_WORDS 32 | |
1969 | -#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) | |
1970 | -#define MD_SB_PERSONALITY_WORDS 64 | |
1971 | -#define MD_SB_DISKS_WORDS 384 | |
1972 | -#define MD_SB_DESCRIPTOR_WORDS 32 | |
1973 | -#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) | |
1974 | -#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) | |
1975 | -#define MD_SB_DISKS (MD_SB_DISKS_WORDS / MD_SB_DESCRIPTOR_WORDS) | |
1976 | - | |
1977 | -/* | |
1978 | - * Device "operational" state bits | |
1979 | - */ | |
1980 | -#define MD_FAULTY_DEVICE 0 /* Device is faulty / operational */ | |
1981 | -#define MD_ACTIVE_DEVICE 1 /* Device is a part or the raid set / spare disk */ | |
1982 | -#define MD_SYNC_DEVICE 2 /* Device is in sync with the raid set */ | |
1983 | - | |
1984 | -typedef struct md_device_descriptor_s { | |
1985 | - __u32 number; /* 0 Device number in the entire set */ | |
1986 | - __u32 major; /* 1 Device major number */ | |
1987 | - __u32 minor; /* 2 Device minor number */ | |
1988 | - __u32 raid_disk; /* 3 The role of the device in the raid set */ | |
1989 | - __u32 state; /* 4 Operational state */ | |
1990 | - __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; | |
1991 | -} md_descriptor_t; | |
1992 | - | |
1993 | -#define MD_SB_MAGIC 0xa92b4efc | |
1994 | - | |
1995 | -/* | |
1996 | - * Superblock state bits | |
1997 | - */ | |
1998 | -#define MD_SB_CLEAN 0 | |
1999 | -#define MD_SB_ERRORS 1 | |
2000 | - | |
2001 | -typedef struct md_superblock_s { | |
2002 | - | |
2003 | - /* | |
2004 | - * Constant generic information | |
2005 | - */ | |
2006 | - __u32 md_magic; /* 0 MD identifier */ | |
2007 | - __u32 major_version; /* 1 major version to which the set conforms */ | |
2008 | - __u32 minor_version; /* 2 minor version to which the set conforms */ | |
2009 | - __u32 patch_version; /* 3 patchlevel version to which the set conforms */ | |
2010 | - __u32 gvalid_words; /* 4 Number of non-reserved words in this section */ | |
2011 | - __u32 set_magic; /* 5 Raid set identifier */ | |
2012 | - __u32 ctime; /* 6 Creation time */ | |
2013 | - __u32 level; /* 7 Raid personality (mirroring, raid5, ...) */ | |
2014 | - __u32 size; /* 8 Apparent size of each individual disk, in kB */ | |
2015 | - __u32 nr_disks; /* 9 Number of total disks in the raid set */ | |
2016 | - __u32 raid_disks; /* 10 Number of disks in a fully functional raid set */ | |
2017 | - __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 11]; | |
2018 | - | |
2019 | - /* | |
2020 | - * Generic state information | |
2021 | - */ | |
2022 | - __u32 utime; /* 0 Superblock update time */ | |
2023 | - __u32 state; /* 1 State bits (clean, ...) */ | |
2024 | - __u32 active_disks; /* 2 Number of currently active disks (some non-faulty disks might not be in sync) */ | |
2025 | - __u32 working_disks; /* 3 Number of working disks */ | |
2026 | - __u32 failed_disks; /* 4 Number of failed disks */ | |
2027 | - __u32 spare_disks; /* 5 Number of spare disks */ | |
2028 | - __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 6]; | |
2029 | - | |
2030 | - /* | |
2031 | - * Personality information | |
2032 | - */ | |
2033 | - __u32 parity_algorithm; | |
2034 | - __u32 chunk_size; | |
2035 | - __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 2]; | |
2036 | - | |
2037 | - /* | |
2038 | - * Disks information | |
2039 | - */ | |
2040 | - md_descriptor_t disks[MD_SB_DISKS]; | |
2041 | - | |
2042 | - /* | |
2043 | - * Reserved | |
2044 | - */ | |
2045 | - __u32 reserved[MD_SB_RESERVED_WORDS]; | |
2046 | - | |
2047 | - /* | |
2048 | - * Active descriptor | |
2049 | - */ | |
2050 | - md_descriptor_t descriptor; | |
2051 | -} md_superblock_t; | |
2052 | - | |
2053 | -#ifdef __KERNEL__ | |
2054 | - | |
2055 | -#include <linux/mm.h> | |
2056 | -#include <linux/fs.h> | |
2057 | -#include <linux/blkdev.h> | |
2058 | -#include <asm/semaphore.h> | |
2059 | - | |
2060 | -/* | |
2061 | - * Kernel-based reconstruction is mostly working, but still requires | |
2062 | - * some additional work. | |
2063 | - */ | |
2064 | -#define SUPPORT_RECONSTRUCTION 0 | |
2065 | - | |
2066 | -#define MAX_REAL 8 /* Max number of physical dev per md dev */ | |
2067 | -#define MAX_MD_DEV 4 /* Max number of md dev */ | |
2068 | - | |
2069 | -#define FACTOR(a) ((a)->repartition & FACTOR_MASK) | |
2070 | -#define MAX_FAULT(a) (((a)->repartition & FAULT_MASK)>>8) | |
2071 | -#define PERSONALITY(a) ((a)->repartition & PERSONALITY_MASK) | |
2072 | - | |
2073 | -#define FACTOR_SHIFT(a) (PAGE_SHIFT + (a) - 10) | |
2074 | - | |
2075 | -struct real_dev | |
2076 | -{ | |
2077 | - kdev_t dev; /* Device number */ | |
2078 | - int size; /* Device size (in blocks) */ | |
2079 | - int offset; /* Real device offset (in blocks) in md dev | |
2080 | - (only used in linear mode) */ | |
2081 | - struct inode *inode; /* Lock inode */ | |
2082 | - md_superblock_t *sb; | |
2083 | - u32 sb_offset; | |
2084 | -}; | |
2085 | - | |
2086 | -struct md_dev; | |
2087 | - | |
2088 | -#define SPARE_INACTIVE 0 | |
2089 | -#define SPARE_WRITE 1 | |
2090 | -#define SPARE_ACTIVE 2 | |
2091 | - | |
2092 | -struct md_personality | |
2093 | -{ | |
2094 | - char *name; | |
2095 | - int (*map)(struct md_dev *mddev, kdev_t *rdev, | |
2096 | - unsigned long *rsector, unsigned long size); | |
2097 | - int (*make_request)(struct md_dev *mddev, int rw, struct buffer_head * bh); | |
2098 | - void (*end_request)(struct buffer_head * bh, int uptodate); | |
2099 | - int (*run)(int minor, struct md_dev *mddev); | |
2100 | - int (*stop)(int minor, struct md_dev *mddev); | |
2101 | - int (*status)(char *page, int minor, struct md_dev *mddev); | |
2102 | - int (*ioctl)(struct inode *inode, struct file *file, | |
2103 | - unsigned int cmd, unsigned long arg); | |
2104 | - int max_invalid_dev; | |
2105 | - int (*error_handler)(struct md_dev *mddev, kdev_t dev); | |
2106 | - | |
2107 | -/* | |
2108 | - * Some personalities (RAID-1, RAID-5) can get disks hot-added and | |
2109 | - * hot-removed. Hot removal is different from failure. (failure marks | |
2110 | - * a disk inactive, but the disk is still part of the array) | |
2111 | - */ | |
2112 | - int (*hot_add_disk) (struct md_dev *mddev, kdev_t dev); | |
2113 | - int (*hot_remove_disk) (struct md_dev *mddev, kdev_t dev); | |
2114 | - int (*mark_spare) (struct md_dev *mddev, md_descriptor_t *descriptor, int state); | |
2115 | -}; | |
2116 | - | |
2117 | -struct md_dev | |
2118 | -{ | |
2119 | - struct real_dev devices[MAX_REAL]; | |
2120 | - struct md_personality *pers; | |
2121 | - md_superblock_t *sb; | |
2122 | - int sb_dirty; | |
2123 | - int repartition; | |
2124 | - int busy; | |
2125 | - int nb_dev; | |
2126 | - void *private; | |
2127 | -}; | |
2128 | - | |
2129 | -struct md_thread { | |
2130 | - void (*run) (void *data); | |
2131 | - void *data; | |
2132 | - struct wait_queue *wqueue; | |
2133 | - unsigned long flags; | |
2134 | - struct semaphore *sem; | |
2135 | - struct task_struct *tsk; | |
2136 | -}; | |
2137 | - | |
2138 | -#define THREAD_WAKEUP 0 | |
2139 | - | |
2140 | -extern struct md_dev md_dev[MAX_MD_DEV]; | |
2141 | -extern int md_size[MAX_MD_DEV]; | |
2142 | -extern int md_maxreadahead[MAX_MD_DEV]; | |
2143 | - | |
2144 | -extern char *partition_name (kdev_t dev); | |
2145 | - | |
2146 | -extern int register_md_personality (int p_num, struct md_personality *p); | |
2147 | -extern int unregister_md_personality (int p_num); | |
2148 | -extern struct md_thread *md_register_thread (void (*run) (void *data), void *data); | |
2149 | -extern void md_unregister_thread (struct md_thread *thread); | |
2150 | -extern void md_wakeup_thread(struct md_thread *thread); | |
2151 | -extern int md_update_sb (int minor); | |
2152 | -extern int md_do_sync(struct md_dev *mddev); | |
2153 | - | |
2154 | -#endif __KERNEL__ | |
2155 | -#endif _MD_H | |
2156 | --- linux/include/linux/raid0.h.orig Tue Oct 29 14:20:24 1996 | |
2157 | +++ linux/include/linux/raid0.h Fri Nov 23 11:18:18 2001 | |
2158 | @@ -1,27 +0,0 @@ | |
2159 | -#ifndef _RAID0_H | |
2160 | -#define _RAID0_H | |
2161 | - | |
2162 | -struct strip_zone | |
2163 | -{ | |
2164 | - int zone_offset; /* Zone offset in md_dev */ | |
2165 | - int dev_offset; /* Zone offset in real dev */ | |
2166 | - int size; /* Zone size */ | |
2167 | - int nb_dev; /* Number of devices attached to the zone */ | |
2168 | - struct real_dev *dev[MAX_REAL]; /* Devices attached to the zone */ | |
2169 | -}; | |
2170 | - | |
2171 | -struct raid0_hash | |
2172 | -{ | |
2173 | - struct strip_zone *zone0, *zone1; | |
2174 | -}; | |
2175 | - | |
2176 | -struct raid0_data | |
2177 | -{ | |
2178 | - struct raid0_hash *hash_table; /* Dynamically allocated */ | |
2179 | - struct strip_zone *strip_zone; /* This one too */ | |
2180 | - int nr_strip_zones; | |
2181 | - struct strip_zone *smallest; | |
2182 | - int nr_zones; | |
2183 | -}; | |
2184 | - | |
2185 | -#endif | |
2186 | --- linux/include/linux/raid1.h.orig Fri May 8 09:17:13 1998 | |
2187 | +++ linux/include/linux/raid1.h Fri Nov 23 11:18:18 2001 | |
2188 | @@ -1,49 +0,0 @@ | |
2189 | -#ifndef _RAID1_H | |
2190 | -#define _RAID1_H | |
2191 | - | |
2192 | -#include <linux/md.h> | |
2193 | - | |
2194 | -struct mirror_info { | |
2195 | - int number; | |
2196 | - int raid_disk; | |
2197 | - kdev_t dev; | |
2198 | - int next; | |
2199 | - int sect_limit; | |
2200 | - | |
2201 | - /* | |
2202 | - * State bits: | |
2203 | - */ | |
2204 | - int operational; | |
2205 | - int write_only; | |
2206 | - int spare; | |
2207 | -}; | |
2208 | - | |
2209 | -struct raid1_data { | |
2210 | - struct md_dev *mddev; | |
2211 | - struct mirror_info mirrors[MD_SB_DISKS]; /* RAID1 devices, 2 to MD_SB_DISKS */ | |
2212 | - int raid_disks; | |
2213 | - int working_disks; /* Number of working disks */ | |
2214 | - int last_used; | |
2215 | - unsigned long next_sect; | |
2216 | - int sect_count; | |
2217 | - int resync_running; | |
2218 | -}; | |
2219 | - | |
2220 | -/* | |
2221 | - * this is our 'private' 'collective' RAID1 buffer head. | |
2222 | - * it contains information about what kind of IO operations were started | |
2223 | - * for this RAID5 operation, and about their status: | |
2224 | - */ | |
2225 | - | |
2226 | -struct raid1_bh { | |
2227 | - unsigned int remaining; | |
2228 | - int cmd; | |
2229 | - unsigned long state; | |
2230 | - struct md_dev *mddev; | |
2231 | - struct buffer_head *master_bh; | |
2232 | - struct buffer_head *mirror_bh [MD_SB_DISKS]; | |
2233 | - struct buffer_head bh_req; | |
2234 | - struct buffer_head *next_retry; | |
2235 | -}; | |
2236 | - | |
2237 | -#endif | |
2238 | --- linux/include/linux/raid5.h.orig Fri May 8 09:17:13 1998 | |
2239 | +++ linux/include/linux/raid5.h Fri Nov 23 11:18:18 2001 | |
2240 | @@ -1,110 +0,0 @@ | |
2241 | -#ifndef _RAID5_H | |
2242 | -#define _RAID5_H | |
2243 | - | |
2244 | -#ifdef __KERNEL__ | |
2245 | -#include <linux/md.h> | |
2246 | -#include <asm/atomic.h> | |
2247 | - | |
2248 | -struct disk_info { | |
2249 | - kdev_t dev; | |
2250 | - int operational; | |
2251 | - int number; | |
2252 | - int raid_disk; | |
2253 | - int write_only; | |
2254 | - int spare; | |
2255 | -}; | |
2256 | - | |
2257 | -struct stripe_head { | |
2258 | - struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ | |
2259 | - struct stripe_head *free_next; /* pool of free sh's */ | |
2260 | - struct buffer_head *buffer_pool; /* pool of free buffers */ | |
2261 | - struct buffer_head *bh_pool; /* pool of free bh's */ | |
2262 | - struct raid5_data *raid_conf; | |
2263 | - struct buffer_head *bh_old[MD_SB_DISKS]; /* disk image */ | |
2264 | - struct buffer_head *bh_new[MD_SB_DISKS]; /* buffers of the MD device (present in buffer cache) */ | |
2265 | - struct buffer_head *bh_copy[MD_SB_DISKS]; /* copy on write of bh_new (bh_new can change from under us) */ | |
2266 | - struct buffer_head *bh_req[MD_SB_DISKS]; /* copy of bh_new (only the buffer heads), queued to the lower levels */ | |
2267 | - int cmd_new[MD_SB_DISKS]; /* READ/WRITE for new */ | |
2268 | - int new[MD_SB_DISKS]; /* buffer added since the last handle_stripe() */ | |
2269 | - unsigned long sector; /* sector of this row */ | |
2270 | - int size; /* buffers size */ | |
2271 | - int pd_idx; /* parity disk index */ | |
2272 | - int nr_pending; /* nr of pending cmds */ | |
2273 | - unsigned long state; /* state flags */ | |
2274 | - int cmd; /* stripe cmd */ | |
2275 | - int count; /* nr of waiters */ | |
2276 | - int write_method; /* reconstruct-write / read-modify-write */ | |
2277 | - int phase; /* PHASE_BEGIN, ..., PHASE_COMPLETE */ | |
2278 | - struct wait_queue *wait; /* processes waiting for this stripe */ | |
2279 | -}; | |
2280 | - | |
2281 | -/* | |
2282 | - * Phase | |
2283 | - */ | |
2284 | -#define PHASE_BEGIN 0 | |
2285 | -#define PHASE_READ_OLD 1 | |
2286 | -#define PHASE_WRITE 2 | |
2287 | -#define PHASE_READ 3 | |
2288 | -#define PHASE_COMPLETE 4 | |
2289 | - | |
2290 | -/* | |
2291 | - * Write method | |
2292 | - */ | |
2293 | -#define METHOD_NONE 0 | |
2294 | -#define RECONSTRUCT_WRITE 1 | |
2295 | -#define READ_MODIFY_WRITE 2 | |
2296 | - | |
2297 | -/* | |
2298 | - * Stripe state | |
2299 | - */ | |
2300 | -#define STRIPE_LOCKED 0 | |
2301 | -#define STRIPE_ERROR 1 | |
2302 | - | |
2303 | -/* | |
2304 | - * Stripe commands | |
2305 | - */ | |
2306 | -#define STRIPE_NONE 0 | |
2307 | -#define STRIPE_WRITE 1 | |
2308 | -#define STRIPE_READ 2 | |
2309 | - | |
2310 | -struct raid5_data { | |
2311 | - struct stripe_head **stripe_hashtbl; | |
2312 | - struct md_dev *mddev; | |
2313 | - struct md_thread *thread, *resync_thread; | |
2314 | - struct disk_info disks[MD_SB_DISKS]; | |
2315 | - struct disk_info *spare; | |
2316 | - int buffer_size; | |
2317 | - int chunk_size, level, algorithm; | |
2318 | - int raid_disks, working_disks, failed_disks; | |
2319 | - int sector_count; | |
2320 | - unsigned long next_sector; | |
2321 | - atomic_t nr_handle; | |
2322 | - struct stripe_head *next_free_stripe; | |
2323 | - int nr_stripes; | |
2324 | - int resync_parity; | |
2325 | - int max_nr_stripes; | |
2326 | - int clock; | |
2327 | - int nr_hashed_stripes; | |
2328 | - int nr_locked_stripes; | |
2329 | - int nr_pending_stripes; | |
2330 | - int nr_cached_stripes; | |
2331 | - | |
2332 | - /* | |
2333 | - * Free stripes pool | |
2334 | - */ | |
2335 | - int nr_free_sh; | |
2336 | - struct stripe_head *free_sh_list; | |
2337 | - struct wait_queue *wait_for_stripe; | |
2338 | -}; | |
2339 | - | |
2340 | -#endif | |
2341 | - | |
2342 | -/* | |
2343 | - * Our supported algorithms | |
2344 | - */ | |
2345 | -#define ALGORITHM_LEFT_ASYMMETRIC 0 | |
2346 | -#define ALGORITHM_RIGHT_ASYMMETRIC 1 | |
2347 | -#define ALGORITHM_LEFT_SYMMETRIC 2 | |
2348 | -#define ALGORITHM_RIGHT_SYMMETRIC 3 | |
2349 | - | |
2350 | -#endif | |
2351 | --- linux/include/linux/sysctl.h.orig Fri Nov 23 11:17:44 2001 | |
2352 | +++ linux/include/linux/sysctl.h Fri Nov 23 11:18:18 2001 | |
2353 | @@ -435,6 +435,7 @@ | |
2354 | enum { | |
2355 | DEV_CDROM=1, | |
2356 | DEV_HWMON=2, | |
2357 | + DEV_MD=3, | |
2358 | DEV_MAC_HID=5 | |
2359 | }; | |
2360 | ||
2361 | @@ -446,6 +447,11 @@ | |
2362 | DEV_CDROM_DEBUG=4, | |
2363 | DEV_CDROM_LOCK=5, | |
2364 | DEV_CDROM_CHECK_MEDIA=6 | |
2365 | +}; | |
2366 | + | |
2367 | +/* /proc/sys/dev/md */ | |
2368 | +enum { | |
2369 | + DEV_MD_SPEED_LIMIT=1 | |
2370 | }; | |
2371 | ||
2372 | /* /proc/sys/dev/mac_hid */ | |
2373 | --- linux/include/asm-i386/md.h.orig Fri May 8 09:17:13 1998 | |
2374 | +++ linux/include/asm-i386/md.h Fri Nov 23 11:18:18 2001 | |
2375 | @@ -1,13 +0,0 @@ | |
2376 | -/* $Id$ | |
2377 | - * md.h: High speed xor_block operation for RAID4/5 | |
2378 | - * | |
2379 | - */ | |
2380 | - | |
2381 | -#ifndef __ASM_MD_H | |
2382 | -#define __ASM_MD_H | |
2383 | - | |
2384 | -/* #define HAVE_ARCH_XORBLOCK */ | |
2385 | - | |
2386 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
2387 | - | |
2388 | -#endif /* __ASM_MD_H */ | |
2389 | --- linux/include/asm-alpha/md.h.orig Fri May 8 09:17:13 1998 | |
2390 | +++ linux/include/asm-alpha/md.h Fri Nov 23 11:18:18 2001 | |
2391 | @@ -1,13 +0,0 @@ | |
2392 | -/* $Id$ | |
2393 | - * md.h: High speed xor_block operation for RAID4/5 | |
2394 | - * | |
2395 | - */ | |
2396 | - | |
2397 | -#ifndef __ASM_MD_H | |
2398 | -#define __ASM_MD_H | |
2399 | - | |
2400 | -/* #define HAVE_ARCH_XORBLOCK */ | |
2401 | - | |
2402 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
2403 | - | |
2404 | -#endif /* __ASM_MD_H */ | |
2405 | --- linux/include/asm-m68k/md.h.orig Fri May 8 09:15:22 1998 | |
2406 | +++ linux/include/asm-m68k/md.h Fri Nov 23 11:18:18 2001 | |
2407 | @@ -1,13 +0,0 @@ | |
2408 | -/* $Id$ | |
2409 | - * md.h: High speed xor_block operation for RAID4/5 | |
2410 | - * | |
2411 | - */ | |
2412 | - | |
2413 | -#ifndef __ASM_MD_H | |
2414 | -#define __ASM_MD_H | |
2415 | - | |
2416 | -/* #define HAVE_ARCH_XORBLOCK */ | |
2417 | - | |
2418 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
2419 | - | |
2420 | -#endif /* __ASM_MD_H */ | |
2421 | --- linux/include/asm-sparc/md.h.orig Tue Jan 13 00:15:54 1998 | |
2422 | +++ linux/include/asm-sparc/md.h Fri Nov 23 11:18:18 2001 | |
2423 | @@ -1,13 +0,0 @@ | |
2424 | -/* $Id$ | |
2425 | - * md.h: High speed xor_block operation for RAID4/5 | |
2426 | - * | |
2427 | - */ | |
2428 | - | |
2429 | -#ifndef __ASM_MD_H | |
2430 | -#define __ASM_MD_H | |
2431 | - | |
2432 | -/* #define HAVE_ARCH_XORBLOCK */ | |
2433 | - | |
2434 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
2435 | - | |
2436 | -#endif /* __ASM_MD_H */ | |
2437 | --- linux/include/asm-ppc/md.h.orig Wed Oct 27 02:53:42 1999 | |
2438 | +++ linux/include/asm-ppc/md.h Fri Nov 23 11:18:18 2001 | |
2439 | @@ -1,13 +0,0 @@ | |
2440 | -/* $Id$ | |
2441 | - * md.h: High speed xor_block operation for RAID4/5 | |
2442 | - * | |
2443 | - */ | |
2444 | - | |
2445 | -#ifndef __ASM_MD_H | |
2446 | -#define __ASM_MD_H | |
2447 | - | |
2448 | -/* #define HAVE_ARCH_XORBLOCK */ | |
2449 | - | |
2450 | -#define MD_XORBLOCK_ALIGNMENT sizeof(long) | |
2451 | - | |
2452 | -#endif /* __ASM_MD_H */ | |
2453 | --- linux/include/asm-sparc64/md.h.orig Tue Jan 13 00:15:58 1998 | |
2454 | +++ linux/include/asm-sparc64/md.h Fri Nov 23 11:18:18 2001 | |
2455 | @@ -1,91 +0,0 @@ | |
2456 | -/* $Id$ | |
2457 | - * md.h: High speed xor_block operation for RAID4/5 | |
2458 | - * utilizing the UltraSparc Visual Instruction Set. | |
2459 | - * | |
2460 | - * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | |
2461 | - */ | |
2462 | - | |
2463 | -#ifndef __ASM_MD_H | |
2464 | -#define __ASM_MD_H | |
2465 | - | |
2466 | -#include <asm/head.h> | |
2467 | -#include <asm/asi.h> | |
2468 | - | |
2469 | -#define HAVE_ARCH_XORBLOCK | |
2470 | - | |
2471 | -#define MD_XORBLOCK_ALIGNMENT 64 | |
2472 | - | |
2473 | -/* void __xor_block (char *dest, char *src, long len) | |
2474 | - * { | |
2475 | - * while (len--) *dest++ ^= *src++; | |
2476 | - * } | |
2477 | - * | |
2478 | - * Requirements: | |
2479 | - * !(((long)dest | (long)src) & (MD_XORBLOCK_ALIGNMENT - 1)) && | |
2480 | - * !(len & 127) && len >= 256 | |
2481 | - */ | |
2482 | - | |
2483 | -static inline void __xor_block (char *dest, char *src, long len) | |
2484 | -{ | |
2485 | - __asm__ __volatile__ (" | |
2486 | - wr %%g0, %3, %%fprs | |
2487 | - wr %%g0, %4, %%asi | |
2488 | - membar #LoadStore|#StoreLoad|#StoreStore | |
2489 | - sub %2, 128, %2 | |
2490 | - ldda [%0] %4, %%f0 | |
2491 | - ldda [%1] %4, %%f16 | |
2492 | -1: ldda [%0 + 64] %%asi, %%f32 | |
2493 | - fxor %%f0, %%f16, %%f16 | |
2494 | - fxor %%f2, %%f18, %%f18 | |
2495 | - fxor %%f4, %%f20, %%f20 | |
2496 | - fxor %%f6, %%f22, %%f22 | |
2497 | - fxor %%f8, %%f24, %%f24 | |
2498 | - fxor %%f10, %%f26, %%f26 | |
2499 | - fxor %%f12, %%f28, %%f28 | |
2500 | - fxor %%f14, %%f30, %%f30 | |
2501 | - stda %%f16, [%0] %4 | |
2502 | - ldda [%1 + 64] %%asi, %%f48 | |
2503 | - ldda [%0 + 128] %%asi, %%f0 | |
2504 | - fxor %%f32, %%f48, %%f48 | |
2505 | - fxor %%f34, %%f50, %%f50 | |
2506 | - add %0, 128, %0 | |
2507 | - fxor %%f36, %%f52, %%f52 | |
2508 | - add %1, 128, %1 | |
2509 | - fxor %%f38, %%f54, %%f54 | |
2510 | - subcc %2, 128, %2 | |
2511 | - fxor %%f40, %%f56, %%f56 | |
2512 | - fxor %%f42, %%f58, %%f58 | |
2513 | - fxor %%f44, %%f60, %%f60 | |
2514 | - fxor %%f46, %%f62, %%f62 | |
2515 | - stda %%f48, [%0 - 64] %%asi | |
2516 | - bne,pt %%xcc, 1b | |
2517 | - ldda [%1] %4, %%f16 | |
2518 | - ldda [%0 + 64] %%asi, %%f32 | |
2519 | - fxor %%f0, %%f16, %%f16 | |
2520 | - fxor %%f2, %%f18, %%f18 | |
2521 | - fxor %%f4, %%f20, %%f20 | |
2522 | - fxor %%f6, %%f22, %%f22 | |
2523 | - fxor %%f8, %%f24, %%f24 | |
2524 | - fxor %%f10, %%f26, %%f26 | |
2525 | - fxor %%f12, %%f28, %%f28 | |
2526 | - fxor %%f14, %%f30, %%f30 | |
2527 | - stda %%f16, [%0] %4 | |
2528 | - ldda [%1 + 64] %%asi, %%f48 | |
2529 | - membar #Sync | |
2530 | - fxor %%f32, %%f48, %%f48 | |
2531 | - fxor %%f34, %%f50, %%f50 | |
2532 | - fxor %%f36, %%f52, %%f52 | |
2533 | - fxor %%f38, %%f54, %%f54 | |
2534 | - fxor %%f40, %%f56, %%f56 | |
2535 | - fxor %%f42, %%f58, %%f58 | |
2536 | - fxor %%f44, %%f60, %%f60 | |
2537 | - fxor %%f46, %%f62, %%f62 | |
2538 | - stda %%f48, [%0 + 64] %%asi | |
2539 | - membar #Sync|#StoreStore|#StoreLoad | |
2540 | - wr %%g0, 0, %%fprs | |
2541 | - " : : | |
2542 | - "r" (dest), "r" (src), "r" (len), "i" (FPRS_FEF), "i" (ASI_BLK_P) : | |
2543 | - "cc", "memory"); | |
2544 | -} | |
2545 | - | |
2546 | -#endif /* __ASM_MD_H */ | |
2547 | --- linux/drivers/block/Config.in.orig Fri Nov 23 11:17:44 2001 | |
2548 | +++ linux/drivers/block/Config.in Fri Nov 23 11:18:18 2001 | |
2549 | @@ -103,10 +103,13 @@ | |
2550 | fi | |
2551 | bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD | |
2552 | if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then | |
2553 | + bool 'Autodetect RAID partitions' CONFIG_AUTODETECT_RAID | |
2554 | tristate ' Linear (append) mode' CONFIG_MD_LINEAR | |
2555 | tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED | |
2556 | tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING | |
2557 | tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 | |
2558 | + tristate ' Translucent mode' CONFIG_MD_TRANSLUCENT | |
2559 | + tristate ' Hierarchical Storage Management support' CONFIG_MD_HSM | |
2560 | fi | |
2561 | if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then | |
2562 | bool ' Boot support (linear, striped)' CONFIG_MD_BOOT | |
2563 | --- linux/drivers/block/Makefile.orig Mon Dec 11 01:49:41 2000 | |
2564 | +++ linux/drivers/block/Makefile Fri Nov 23 11:18:18 2001 | |
2565 | @@ -294,10 +294,28 @@ | |
2566 | endif | |
2567 | ||
2568 | ifeq ($(CONFIG_MD_RAID5),y) | |
2569 | +LX_OBJS += xor.o | |
2570 | L_OBJS += raid5.o | |
2571 | else | |
2572 | ifeq ($(CONFIG_MD_RAID5),m) | |
2573 | + LX_OBJS += xor.o | |
2574 | M_OBJS += raid5.o | |
2575 | + endif | |
2576 | +endif | |
2577 | + | |
2578 | +ifeq ($(CONFIG_MD_TRANSLUCENT),y) | |
2579 | +L_OBJS += translucent.o | |
2580 | +else | |
2581 | + ifeq ($(CONFIG_MD_TRANSLUCENT),m) | |
2582 | + M_OBJS += translucent.o | |
2583 | + endif | |
2584 | +endif | |
2585 | + | |
2586 | +ifeq ($(CONFIG_MD_HSM),y) | |
2587 | +L_OBJS += hsm.o | |
2588 | +else | |
2589 | + ifeq ($(CONFIG_MD_HSM),m) | |
2590 | + M_OBJS += hsm.o | |
2591 | endif | |
2592 | endif | |
2593 | ||
2594 | --- linux/drivers/block/genhd.c.orig Fri Nov 23 11:17:44 2001 | |
2595 | +++ linux/drivers/block/genhd.c Fri Nov 23 11:18:18 2001 | |
2596 | @@ -28,6 +28,7 @@ | |
2597 | #include <linux/string.h> | |
2598 | #include <linux/blk.h> | |
2599 | #include <linux/init.h> | |
2600 | +#include <linux/raid/md.h> | |
2601 | ||
2602 | #ifdef CONFIG_ARCH_S390 | |
2603 | #include <asm/dasd.h> | |
2604 | @@ -1786,6 +1787,9 @@ | |
2605 | else | |
2606 | #endif | |
2607 | rd_load(); | |
2608 | +#endif | |
2609 | +#ifdef CONFIG_BLK_DEV_MD | |
2610 | + autodetect_raid(); | |
2611 | #endif | |
2612 | #ifdef CONFIG_MD_BOOT | |
2613 | md_setup_drive(); | |
2614 | --- linux/drivers/block/hsm.c.orig Fri Nov 23 11:18:18 2001 | |
2615 | +++ linux/drivers/block/hsm.c Fri Nov 23 11:18:18 2001 | |
2616 | @@ -0,0 +1,840 @@ | |
2617 | +/* | |
2618 | + hsm.c : HSM RAID driver for Linux | |
2619 | + Copyright (C) 1998 Ingo Molnar | |
2620 | + | |
2621 | + HSM mode management functions. | |
2622 | + | |
2623 | + This program is free software; you can redistribute it and/or modify | |
2624 | + it under the terms of the GNU General Public License as published by | |
2625 | + the Free Software Foundation; either version 2, or (at your option) | |
2626 | + any later version. | |
2627 | + | |
2628 | + You should have received a copy of the GNU General Public License | |
2629 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
2630 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
2631 | +*/ | |
2632 | + | |
2633 | +#include <linux/module.h> | |
2634 | + | |
2635 | +#include <linux/raid/md.h> | |
2636 | +#include <linux/malloc.h> | |
2637 | + | |
2638 | +#include <linux/raid/hsm.h> | |
2639 | +#include <linux/blk.h> | |
2640 | + | |
2641 | +#define MAJOR_NR MD_MAJOR | |
2642 | +#define MD_DRIVER | |
2643 | +#define MD_PERSONALITY | |
2644 | + | |
2645 | + | |
2646 | +#define DEBUG_HSM 1 | |
2647 | + | |
2648 | +#if DEBUG_HSM | |
2649 | +#define dprintk(x,y...) printk(x,##y) | |
2650 | +#else | |
2651 | +#define dprintk(x,y...) do { } while (0) | |
2652 | +#endif | |
2653 | + | |
2654 | +void print_bh(struct buffer_head *bh) | |
2655 | +{ | |
2656 | + dprintk("bh %p: %lx %lx %x %x %lx %p %lx %p %x %p %x %lx\n", bh, | |
2657 | + bh->b_blocknr, bh->b_size, bh->b_dev, bh->b_rdev, | |
2658 | + bh->b_rsector, bh->b_this_page, bh->b_state, | |
2659 | + bh->b_next_free, bh->b_count, bh->b_data, | |
2660 | + bh->b_list, bh->b_flushtime | |
2661 | + ); | |
2662 | +} | |
2663 | + | |
2664 | +static int check_bg (pv_t *pv, pv_block_group_t * bg) | |
2665 | +{ | |
2666 | + int i, free = 0; | |
2667 | + | |
2668 | + dprintk("checking bg ...\n"); | |
2669 | + | |
2670 | + for (i = 0; i < pv->pv_sb->pv_bg_size-1; i++) { | |
2671 | + if (pv_pptr_free(bg->blocks + i)) { | |
2672 | + free++; | |
2673 | + if (test_bit(i, bg->used_bitmap)) { | |
2674 | + printk("hm, bit %d set?\n", i); | |
2675 | + } | |
2676 | + } else { | |
2677 | + if (!test_bit(i, bg->used_bitmap)) { | |
2678 | + printk("hm, bit %d not set?\n", i); | |
2679 | + } | |
2680 | + } | |
2681 | + } | |
2682 | + dprintk("%d free blocks in bg ...\n", free); | |
2683 | + return free; | |
2684 | +} | |
2685 | + | |
2686 | +static void get_bg (pv_t *pv, pv_bg_desc_t *desc, int nr) | |
2687 | +{ | |
2688 | + unsigned int bg_pos = nr * pv->pv_sb->pv_bg_size + 2; | |
2689 | + struct buffer_head *bh; | |
2690 | + | |
2691 | + dprintk("... getting BG at %u ...\n", bg_pos); | |
2692 | + | |
2693 | + bh = bread (pv->dev, bg_pos, HSM_BLOCKSIZE); | |
2694 | + if (!bh) { | |
2695 | + MD_BUG(); | |
2696 | + return; | |
2697 | + } | |
2698 | + desc->bg = (pv_block_group_t *) bh->b_data; | |
2699 | + desc->free_blocks = check_bg(pv, desc->bg); | |
2700 | +} | |
2701 | + | |
2702 | +static int find_free_block (lv_t *lv, pv_t *pv, pv_bg_desc_t *desc, int nr, | |
2703 | + unsigned int lblock, lv_lptr_t * index) | |
2704 | +{ | |
2705 | + int i; | |
2706 | + | |
2707 | + for (i = 0; i < pv->pv_sb->pv_bg_size-1; i++) { | |
2708 | + pv_pptr_t * bptr = desc->bg->blocks + i; | |
2709 | + if (pv_pptr_free(bptr)) { | |
2710 | + unsigned int bg_pos = nr * pv->pv_sb->pv_bg_size + 2; | |
2711 | + | |
2712 | + if (test_bit(i, desc->bg->used_bitmap)) { | |
2713 | + MD_BUG(); | |
2714 | + continue; | |
2715 | + } | |
2716 | + bptr->u.used.owner.log_id = lv->log_id; | |
2717 | + bptr->u.used.owner.log_index = lblock; | |
2718 | + index->data.phys_nr = pv->phys_nr; | |
2719 | + index->data.phys_block = bg_pos + i + 1; | |
2720 | + set_bit(i, desc->bg->used_bitmap); | |
2721 | + desc->free_blocks--; | |
2722 | + dprintk(".....free blocks left in bg %p: %d\n", | |
2723 | + desc->bg, desc->free_blocks); | |
2724 | + return 0; | |
2725 | + } | |
2726 | + } | |
2727 | + return -ENOSPC; | |
2728 | +} | |
2729 | + | |
2730 | +static int __get_free_block (lv_t *lv, pv_t *pv, | |
2731 | + unsigned int lblock, lv_lptr_t * index) | |
2732 | +{ | |
2733 | + int i; | |
2734 | + | |
2735 | + dprintk("trying to get free block for lblock %d ...\n", lblock); | |
2736 | + | |
2737 | + for (i = 0; i < pv->pv_sb->pv_block_groups; i++) { | |
2738 | + pv_bg_desc_t *desc = pv->bg_array + i; | |
2739 | + | |
2740 | + dprintk("looking at desc #%d (%p)...\n", i, desc->bg); | |
2741 | + if (!desc->bg) | |
2742 | + get_bg(pv, desc, i); | |
2743 | + | |
2744 | + if (desc->bg && desc->free_blocks) | |
2745 | + return find_free_block(lv, pv, desc, i, | |
2746 | + lblock, index); | |
2747 | + } | |
2748 | + dprintk("hsm: pv %s full!\n", partition_name(pv->dev)); | |
2749 | + return -ENOSPC; | |
2750 | +} | |
2751 | + | |
2752 | +static int get_free_block (lv_t *lv, unsigned int lblock, lv_lptr_t * index) | |
2753 | +{ | |
2754 | + int err; | |
2755 | + | |
2756 | + if (!lv->free_indices) | |
2757 | + return -ENOSPC; | |
2758 | + | |
2759 | + /* fix me */ | |
2760 | + err = __get_free_block(lv, lv->vg->pv_array + 0, lblock, index); | |
2761 | + | |
2762 | + if (err || !index->data.phys_block) { | |
2763 | + MD_BUG(); | |
2764 | + return -ENOSPC; | |
2765 | + } | |
2766 | + | |
2767 | + lv->free_indices--; | |
2768 | + | |
2769 | + return 0; | |
2770 | +} | |
2771 | + | |
2772 | +/* | |
2773 | + * fix me: wordsize assumptions ... | |
2774 | + */ | |
2775 | +#define INDEX_BITS 8 | |
2776 | +#define INDEX_DEPTH (32/INDEX_BITS) | |
2777 | +#define INDEX_MASK ((1<<INDEX_BITS) - 1) | |
2778 | + | |
2779 | +static void print_index_list (lv_t *lv, lv_lptr_t *index) | |
2780 | +{ | |
2781 | + lv_lptr_t *tmp; | |
2782 | + int i; | |
2783 | + | |
2784 | + dprintk("... block <%u,%u,%x> [.", index->data.phys_nr, | |
2785 | + index->data.phys_block, index->cpu_addr); | |
2786 | + | |
2787 | + tmp = index_child(index); | |
2788 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
2789 | + if (index_block(lv, tmp)) | |
2790 | + dprintk("(%d->%d)", i, index_block(lv, tmp)); | |
2791 | + tmp++; | |
2792 | + } | |
2793 | + dprintk(".]\n"); | |
2794 | +} | |
2795 | + | |
2796 | +static int read_index_group (lv_t *lv, lv_lptr_t *index) | |
2797 | +{ | |
2798 | + lv_lptr_t *index_group, *tmp; | |
2799 | + struct buffer_head *bh; | |
2800 | + int i; | |
2801 | + | |
2802 | + dprintk("reading index group <%s:%d>\n", | |
2803 | + partition_name(index_dev(lv, index)), index_block(lv, index)); | |
2804 | + | |
2805 | + bh = bread(index_dev(lv, index), index_block(lv, index), HSM_BLOCKSIZE); | |
2806 | + if (!bh) { | |
2807 | + MD_BUG(); | |
2808 | + return -EIO; | |
2809 | + } | |
2810 | + if (!buffer_uptodate(bh)) | |
2811 | + MD_BUG(); | |
2812 | + | |
2813 | + index_group = (lv_lptr_t *) bh->b_data; | |
2814 | + tmp = index_group; | |
2815 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
2816 | + if (index_block(lv, tmp)) { | |
2817 | + dprintk("index group has BLOCK %d, non-present.\n", i); | |
2818 | + tmp->cpu_addr = 0; | |
2819 | + } | |
2820 | + tmp++; | |
2821 | + } | |
2822 | + index->cpu_addr = ptr_to_cpuaddr(index_group); | |
2823 | + | |
2824 | + dprintk("have read index group %p at block %d.\n", | |
2825 | + index_group, index_block(lv, index)); | |
2826 | + print_index_list(lv, index); | |
2827 | + | |
2828 | + return 0; | |
2829 | +} | |
2830 | + | |
2831 | +static int alloc_index_group (lv_t *lv, unsigned int lblock, lv_lptr_t * index) | |
2832 | +{ | |
2833 | + struct buffer_head *bh; | |
2834 | + lv_lptr_t * index_group; | |
2835 | + | |
2836 | + if (get_free_block(lv, lblock, index)) | |
2837 | + return -ENOSPC; | |
2838 | + | |
2839 | + dprintk("creating block for index group <%s:%d>\n", | |
2840 | + partition_name(index_dev(lv, index)), index_block(lv, index)); | |
2841 | + | |
2842 | + bh = getblk(index_dev(lv, index), | |
2843 | + index_block(lv, index), HSM_BLOCKSIZE); | |
2844 | + | |
2845 | + index_group = (lv_lptr_t *) bh->b_data; | |
2846 | + md_clear_page(index_group); | |
2847 | + mark_buffer_uptodate(bh, 1); | |
2848 | + | |
2849 | + index->cpu_addr = ptr_to_cpuaddr(index_group); | |
2850 | + | |
2851 | + dprintk("allocated index group %p at block %d.\n", | |
2852 | + index_group, index_block(lv, index)); | |
2853 | + return 0; | |
2854 | +} | |
2855 | + | |
2856 | +static lv_lptr_t * alloc_fixed_index (lv_t *lv, unsigned int lblock) | |
2857 | +{ | |
2858 | + lv_lptr_t * index = index_child(&lv->root_index); | |
2859 | + int idx, l; | |
2860 | + | |
2861 | + for (l = INDEX_DEPTH-1; l >= 0; l--) { | |
2862 | + idx = (lblock >> (INDEX_BITS*l)) & INDEX_MASK; | |
2863 | + index += idx; | |
2864 | + if (!l) | |
2865 | + break; | |
2866 | + if (!index_present(index)) { | |
2867 | + dprintk("no group, level %u, pos %u\n", l, idx); | |
2868 | + if (alloc_index_group(lv, lblock, index)) | |
2869 | + return NULL; | |
2870 | + } | |
2871 | + index = index_child(index); | |
2872 | + } | |
2873 | + if (!index_block(lv,index)) { | |
2874 | + dprintk("no data, pos %u\n", idx); | |
2875 | + if (get_free_block(lv, lblock, index)) | |
2876 | + return NULL; | |
2877 | + return index; | |
2878 | + } | |
2879 | + MD_BUG(); | |
2880 | + return index; | |
2881 | +} | |
2882 | + | |
2883 | +static lv_lptr_t * find_index (lv_t *lv, unsigned int lblock) | |
2884 | +{ | |
2885 | + lv_lptr_t * index = index_child(&lv->root_index); | |
2886 | + int idx, l; | |
2887 | + | |
2888 | + for (l = INDEX_DEPTH-1; l >= 0; l--) { | |
2889 | + idx = (lblock >> (INDEX_BITS*l)) & INDEX_MASK; | |
2890 | + index += idx; | |
2891 | + if (!l) | |
2892 | + break; | |
2893 | + if (index_free(index)) | |
2894 | + return NULL; | |
2895 | + if (!index_present(index)) | |
2896 | + read_index_group(lv, index); | |
2897 | + if (!index_present(index)) { | |
2898 | + MD_BUG(); | |
2899 | + return NULL; | |
2900 | + } | |
2901 | + index = index_child(index); | |
2902 | + } | |
2903 | + if (!index_block(lv,index)) | |
2904 | + return NULL; | |
2905 | + return index; | |
2906 | +} | |
2907 | + | |
2908 | +static int read_root_index(lv_t *lv) | |
2909 | +{ | |
2910 | + int err; | |
2911 | + lv_lptr_t *index = &lv->root_index; | |
2912 | + | |
2913 | + if (!index_block(lv, index)) { | |
2914 | + printk("LV has no root index yet, creating.\n"); | |
2915 | + | |
2916 | + err = alloc_index_group (lv, 0, index); | |
2917 | + if (err) { | |
2918 | + printk("could not create index group, err:%d\n", err); | |
2919 | + return err; | |
2920 | + } | |
2921 | + lv->vg->vg_sb->lv_array[lv->log_id].lv_root_idx = | |
2922 | + lv->root_index.data; | |
2923 | + } else { | |
2924 | + printk("LV already has a root index.\n"); | |
2925 | + printk("... at <%s:%d>.\n", | |
2926 | + partition_name(index_dev(lv, index)), | |
2927 | + index_block(lv, index)); | |
2928 | + | |
2929 | + read_index_group(lv, index); | |
2930 | + } | |
2931 | + return 0; | |
2932 | +} | |
2933 | + | |
2934 | +static int init_pv(pv_t *pv) | |
2935 | +{ | |
2936 | + struct buffer_head *bh; | |
2937 | + pv_sb_t *pv_sb; | |
2938 | + | |
2939 | + bh = bread (pv->dev, 0, HSM_BLOCKSIZE); | |
2940 | + if (!bh) { | |
2941 | + MD_BUG(); | |
2942 | + return -1; | |
2943 | + } | |
2944 | + | |
2945 | + pv_sb = (pv_sb_t *) bh->b_data; | |
2946 | + pv->pv_sb = pv_sb; | |
2947 | + | |
2948 | + if (pv_sb->pv_magic != HSM_PV_SB_MAGIC) { | |
2949 | + printk("%s is not a PV, has magic %x instead of %x!\n", | |
2950 | + partition_name(pv->dev), pv_sb->pv_magic, | |
2951 | + HSM_PV_SB_MAGIC); | |
2952 | + return -1; | |
2953 | + } | |
2954 | + printk("%s detected as a valid PV (#%d).\n", partition_name(pv->dev), | |
2955 | + pv->phys_nr); | |
2956 | + printk("... created under HSM version %d.%d.%d, at %x.\n", | |
2957 | + pv_sb->pv_major, pv_sb->pv_minor, pv_sb->pv_patch, pv_sb->pv_ctime); | |
2958 | + printk("... total # of blocks: %d (%d left unallocated).\n", | |
2959 | + pv_sb->pv_total_size, pv_sb->pv_blocks_left); | |
2960 | + | |
2961 | + printk("... block size: %d bytes.\n", pv_sb->pv_block_size); | |
2962 | + printk("... block descriptor size: %d bytes.\n", pv_sb->pv_pptr_size); | |
2963 | + printk("... block group size: %d blocks.\n", pv_sb->pv_bg_size); | |
2964 | + printk("... # of block groups: %d.\n", pv_sb->pv_block_groups); | |
2965 | + | |
2966 | + if (pv_sb->pv_block_groups*sizeof(pv_bg_desc_t) > PAGE_SIZE) { | |
2967 | + MD_BUG(); | |
2968 | + return 1; | |
2969 | + } | |
2970 | + pv->bg_array = (pv_bg_desc_t *)__get_free_page(GFP_KERNEL); | |
2971 | + if (!pv->bg_array) { | |
2972 | + MD_BUG(); | |
2973 | + return 1; | |
2974 | + } | |
2975 | + memset(pv->bg_array, 0, PAGE_SIZE); | |
2976 | + | |
2977 | + return 0; | |
2978 | +} | |
2979 | + | |
2980 | +static int free_pv(pv_t *pv) | |
2981 | +{ | |
2982 | + struct buffer_head *bh; | |
2983 | + | |
2984 | + dprintk("freeing PV %d ...\n", pv->phys_nr); | |
2985 | + | |
2986 | + if (pv->bg_array) { | |
2987 | + int i; | |
2988 | + | |
2989 | + dprintk(".... freeing BGs ...\n"); | |
2990 | + for (i = 0; i < pv->pv_sb->pv_block_groups; i++) { | |
2991 | + unsigned int bg_pos = i * pv->pv_sb->pv_bg_size + 2; | |
2992 | + pv_bg_desc_t *desc = pv->bg_array + i; | |
2993 | + | |
2994 | + if (desc->bg) { | |
2995 | + dprintk(".... freeing BG %d ...\n", i); | |
2996 | + bh = getblk (pv->dev, bg_pos, HSM_BLOCKSIZE); | |
2997 | + mark_buffer_dirty(bh, 1); | |
2998 | + brelse(bh); | |
2999 | + brelse(bh); | |
3000 | + } | |
3001 | + } | |
3002 | + free_page((unsigned long)pv->bg_array); | |
3003 | + } else | |
3004 | + MD_BUG(); | |
3005 | + | |
3006 | + bh = getblk (pv->dev, 0, HSM_BLOCKSIZE); | |
3007 | + if (!bh) { | |
3008 | + MD_BUG(); | |
3009 | + return -1; | |
3010 | + } | |
3011 | + mark_buffer_dirty(bh, 1); | |
3012 | + brelse(bh); | |
3013 | + brelse(bh); | |
3014 | + | |
3015 | + return 0; | |
3016 | +} | |
3017 | + | |
3018 | +struct semaphore hsm_sem = MUTEX; | |
3019 | + | |
3020 | +#define HSM_SECTORS (HSM_BLOCKSIZE/512) | |
3021 | + | |
3022 | +static int hsm_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
3023 | + unsigned long *rsector, unsigned long bsectors) | |
3024 | +{ | |
3025 | + lv_t *lv = kdev_to_lv(dev); | |
3026 | + lv_lptr_t *index; | |
3027 | + unsigned int lblock = *rsector / HSM_SECTORS; | |
3028 | + unsigned int offset = *rsector % HSM_SECTORS; | |
3029 | + int err = -EIO; | |
3030 | + | |
3031 | + if (!lv) { | |
3032 | + printk("HSM: md%d not a Logical Volume!\n", mdidx(mddev)); | |
3033 | + goto out; | |
3034 | + } | |
3035 | + if (offset + bsectors > HSM_SECTORS) { | |
3036 | + MD_BUG(); | |
3037 | + goto out; | |
3038 | + } | |
3039 | + down(&hsm_sem); | |
3040 | + index = find_index(lv, lblock); | |
3041 | + if (!index) { | |
3042 | + printk("no block %u yet ... allocating\n", lblock); | |
3043 | + index = alloc_fixed_index(lv, lblock); | |
3044 | + } | |
3045 | + | |
3046 | + err = 0; | |
3047 | + | |
3048 | + printk(" %u <%s : %ld(%ld)> -> ", lblock, | |
3049 | + partition_name(*rdev), *rsector, bsectors); | |
3050 | + | |
3051 | + *rdev = index_dev(lv, index); | |
3052 | + *rsector = index_block(lv, index) * HSM_SECTORS + offset; | |
3053 | + | |
3054 | + printk(" <%s : %ld> %u\n", | |
3055 | + partition_name(*rdev), *rsector, index_block(lv, index)); | |
3056 | + | |
3057 | + up(&hsm_sem); | |
3058 | +out: | |
3059 | + return err; | |
3060 | +} | |
3061 | + | |
3062 | +static void free_index (lv_t *lv, lv_lptr_t * index) | |
3063 | +{ | |
3064 | + struct buffer_head *bh; | |
3065 | + | |
3066 | + printk("tryin to get cached block for index group <%s:%d>\n", | |
3067 | + partition_name(index_dev(lv, index)), index_block(lv, index)); | |
3068 | + | |
3069 | + bh = getblk(index_dev(lv, index), index_block(lv, index),HSM_BLOCKSIZE); | |
3070 | + | |
3071 | + printk("....FREEING "); | |
3072 | + print_index_list(lv, index); | |
3073 | + | |
3074 | + if (bh) { | |
3075 | + if (!buffer_uptodate(bh)) | |
3076 | + MD_BUG(); | |
3077 | + if ((lv_lptr_t *)bh->b_data != index_child(index)) { | |
3078 | + printk("huh? b_data is %p, index content is %p.\n", | |
3079 | + bh->b_data, index_child(index)); | |
3080 | + } else | |
3081 | + printk("good, b_data == index content == %p.\n", | |
3082 | + index_child(index)); | |
3083 | + printk("b_count == %d, writing.\n", bh->b_count); | |
3084 | + mark_buffer_dirty(bh, 1); | |
3085 | + brelse(bh); | |
3086 | + brelse(bh); | |
3087 | + printk("done.\n"); | |
3088 | + } else { | |
3089 | + printk("FAILED!\n"); | |
3090 | + } | |
3091 | + print_index_list(lv, index); | |
3092 | + index_child(index) = NULL; | |
3093 | +} | |
3094 | + | |
3095 | +static void free_index_group (lv_t *lv, int level, lv_lptr_t * index_0) | |
3096 | +{ | |
3097 | + char dots [3*8]; | |
3098 | + lv_lptr_t * index; | |
3099 | + int i, nr_dots; | |
3100 | + | |
3101 | + nr_dots = (INDEX_DEPTH-level)*3; | |
3102 | + memcpy(dots,"...............",nr_dots); | |
3103 | + dots[nr_dots] = 0; | |
3104 | + | |
3105 | + dprintk("%s level %d index group block:\n", dots, level); | |
3106 | + | |
3107 | + | |
3108 | + index = index_0; | |
3109 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
3110 | + if (index->data.phys_block) { | |
3111 | + dprintk("%s block <%u,%u,%x>\n", dots, | |
3112 | + index->data.phys_nr, | |
3113 | + index->data.phys_block, | |
3114 | + index->cpu_addr); | |
3115 | + if (level && index_present(index)) { | |
3116 | + dprintk("%s==> deeper one level\n", dots); | |
3117 | + free_index_group(lv, level-1, | |
3118 | + index_child(index)); | |
3119 | + dprintk("%s freeing index group block %p ...", | |
3120 | + dots, index_child(index)); | |
3121 | + free_index(lv, index); | |
3122 | + } | |
3123 | + } | |
3124 | + index++; | |
3125 | + } | |
3126 | + dprintk("%s DONE: level %d index group block.\n", dots, level); | |
3127 | +} | |
3128 | + | |
3129 | +static void free_lv_indextree (lv_t *lv) | |
3130 | +{ | |
3131 | + dprintk("freeing LV %d ...\n", lv->log_id); | |
3132 | + dprintk("..root index: %p\n", index_child(&lv->root_index)); | |
3133 | + dprintk("..INDEX TREE:\n"); | |
3134 | + free_index_group(lv, INDEX_DEPTH-1, index_child(&lv->root_index)); | |
3135 | + dprintk("..freeing root index %p ...", index_child(&lv->root_index)); | |
3136 | + dprintk("root block <%u,%u,%x>\n", lv->root_index.data.phys_nr, | |
3137 | + lv->root_index.data.phys_block, lv->root_index.cpu_addr); | |
3138 | + free_index(lv, &lv->root_index); | |
3139 | + dprintk("..INDEX TREE done.\n"); | |
3140 | + fsync_dev(lv->vg->pv_array[0].dev); /* fix me */ | |
3141 | + lv->vg->vg_sb->lv_array[lv->log_id].lv_free_indices = lv->free_indices; | |
3142 | +} | |
3143 | + | |
3144 | +static void print_index_group (lv_t *lv, int level, lv_lptr_t * index_0) | |
3145 | +{ | |
3146 | + char dots [3*5]; | |
3147 | + lv_lptr_t * index; | |
3148 | + int i, nr_dots; | |
3149 | + | |
3150 | + nr_dots = (INDEX_DEPTH-level)*3; | |
3151 | + memcpy(dots,"...............",nr_dots); | |
3152 | + dots[nr_dots] = 0; | |
3153 | + | |
3154 | + dprintk("%s level %d index group block:\n", dots, level); | |
3155 | + | |
3156 | + | |
3157 | + for (i = 0; i < HSM_LPTRS_PER_BLOCK; i++) { | |
3158 | + index = index_0 + i; | |
3159 | + if (index->data.phys_block) { | |
3160 | + dprintk("%s block <%u,%u,%x>\n", dots, | |
3161 | + index->data.phys_nr, | |
3162 | + index->data.phys_block, | |
3163 | + index->cpu_addr); | |
3164 | + if (level && index_present(index)) { | |
3165 | + dprintk("%s==> deeper one level\n", dots); | |
3166 | + print_index_group(lv, level-1, | |
3167 | + index_child(index)); | |
3168 | + } | |
3169 | + } | |
3170 | + } | |
3171 | + dprintk("%s DONE: level %d index group block.\n", dots, level); | |
3172 | +} | |
3173 | + | |
3174 | +static void print_lv (lv_t *lv) | |
3175 | +{ | |
3176 | + dprintk("printing LV %d ...\n", lv->log_id); | |
3177 | + dprintk("..root index: %p\n", index_child(&lv->root_index)); | |
3178 | + dprintk("..INDEX TREE:\n"); | |
3179 | + print_index_group(lv, INDEX_DEPTH-1, index_child(&lv->root_index)); | |
3180 | + dprintk("..INDEX TREE done.\n"); | |
3181 | +} | |
3182 | + | |
3183 | +static int map_lv (lv_t *lv) | |
3184 | +{ | |
3185 | + kdev_t dev = lv->dev; | |
3186 | + unsigned int nr = MINOR(dev); | |
3187 | + mddev_t *mddev = lv->vg->mddev; | |
3188 | + | |
3189 | + if (MAJOR(dev) != MD_MAJOR) { | |
3190 | + MD_BUG(); | |
3191 | + return -1; | |
3192 | + } | |
3193 | + if (kdev_to_mddev(dev)) { | |
3194 | + MD_BUG(); | |
3195 | + return -1; | |
3196 | + } | |
3197 | + md_hd_struct[nr].start_sect = 0; | |
3198 | + md_hd_struct[nr].nr_sects = md_size[mdidx(mddev)] << 1; | |
3199 | + md_size[nr] = md_size[mdidx(mddev)]; | |
3200 | + add_mddev_mapping(mddev, dev, lv); | |
3201 | + | |
3202 | + return 0; | |
3203 | +} | |
3204 | + | |
3205 | +static int unmap_lv (lv_t *lv) | |
3206 | +{ | |
3207 | + kdev_t dev = lv->dev; | |
3208 | + unsigned int nr = MINOR(dev); | |
3209 | + | |
3210 | + if (MAJOR(dev) != MD_MAJOR) { | |
3211 | + MD_BUG(); | |
3212 | + return -1; | |
3213 | + } | |
3214 | + md_hd_struct[nr].start_sect = 0; | |
3215 | + md_hd_struct[nr].nr_sects = 0; | |
3216 | + md_size[nr] = 0; | |
3217 | + del_mddev_mapping(lv->vg->mddev, dev); | |
3218 | + | |
3219 | + return 0; | |
3220 | +} | |
3221 | + | |
3222 | +static int init_vg (vg_t *vg) | |
3223 | +{ | |
3224 | + int i; | |
3225 | + lv_t *lv; | |
3226 | + kdev_t dev; | |
3227 | + vg_sb_t *vg_sb; | |
3228 | + struct buffer_head *bh; | |
3229 | + lv_descriptor_t *lv_desc; | |
3230 | + | |
3231 | + /* | |
3232 | + * fix me: read all PVs and compare the SB | |
3233 | + */ | |
3234 | + dev = vg->pv_array[0].dev; | |
3235 | + bh = bread (dev, 1, HSM_BLOCKSIZE); | |
3236 | + if (!bh) { | |
3237 | + MD_BUG(); | |
3238 | + return -1; | |
3239 | + } | |
3240 | + | |
3241 | + vg_sb = (vg_sb_t *) bh->b_data; | |
3242 | + vg->vg_sb = vg_sb; | |
3243 | + | |
3244 | + if (vg_sb->vg_magic != HSM_VG_SB_MAGIC) { | |
3245 | + printk("%s is not a valid VG, has magic %x instead of %x!\n", | |
3246 | + partition_name(dev), vg_sb->vg_magic, | |
3247 | + HSM_VG_SB_MAGIC); | |
3248 | + return -1; | |
3249 | + } | |
3250 | + | |
3251 | + vg->nr_lv = 0; | |
3252 | + for (i = 0; i < HSM_MAX_LVS_PER_VG; i++) { | |
3253 | + unsigned int id; | |
3254 | + lv_desc = vg->vg_sb->lv_array + i; | |
3255 | + | |
3256 | + id = lv_desc->lv_id; | |
3257 | + if (!id) { | |
3258 | + printk("... LV desc %d empty\n", i); | |
3259 | + continue; | |
3260 | + } | |
3261 | + if (id >= HSM_MAX_LVS_PER_VG) { | |
3262 | + MD_BUG(); | |
3263 | + continue; | |
3264 | + } | |
3265 | + | |
3266 | + lv = vg->lv_array + id; | |
3267 | + if (lv->vg) { | |
3268 | + MD_BUG(); | |
3269 | + continue; | |
3270 | + } | |
3271 | + lv->log_id = id; | |
3272 | + lv->vg = vg; | |
3273 | + lv->max_indices = lv_desc->lv_max_indices; | |
3274 | + lv->free_indices = lv_desc->lv_free_indices; | |
3275 | + lv->root_index.data = lv_desc->lv_root_idx; | |
3276 | + lv->dev = MKDEV(MD_MAJOR, lv_desc->md_id); | |
3277 | + | |
3278 | + vg->nr_lv++; | |
3279 | + | |
3280 | + map_lv(lv); | |
3281 | + if (read_root_index(lv)) { | |
3282 | + vg->nr_lv--; | |
3283 | + unmap_lv(lv); | |
3284 | + memset(lv, 0, sizeof(*lv)); | |
3285 | + } | |
3286 | + } | |
3287 | + if (vg->nr_lv != vg_sb->nr_lvs) | |
3288 | + MD_BUG(); | |
3289 | + | |
3290 | + return 0; | |
3291 | +} | |
3292 | + | |
3293 | +static int hsm_run (mddev_t *mddev) | |
3294 | +{ | |
3295 | + int i; | |
3296 | + vg_t *vg; | |
3297 | + mdk_rdev_t *rdev; | |
3298 | + | |
3299 | + MOD_INC_USE_COUNT; | |
3300 | + | |
3301 | + vg = kmalloc (sizeof (*vg), GFP_KERNEL); | |
3302 | + if (!vg) | |
3303 | + goto out; | |
3304 | + memset(vg, 0, sizeof(*vg)); | |
3305 | + mddev->private = vg; | |
3306 | + vg->mddev = mddev; | |
3307 | + | |
3308 | + if (md_check_ordering(mddev)) { | |
3309 | + printk("hsm: disks are not ordered, aborting!\n"); | |
3310 | + goto out; | |
3311 | + } | |
3312 | + | |
3313 | + set_blocksize (mddev_to_kdev(mddev), HSM_BLOCKSIZE); | |
3314 | + | |
3315 | + vg->nr_pv = mddev->nb_dev; | |
3316 | + ITERATE_RDEV_ORDERED(mddev,rdev,i) { | |
3317 | + pv_t *pv = vg->pv_array + i; | |
3318 | + | |
3319 | + pv->dev = rdev->dev; | |
3320 | + fsync_dev (pv->dev); | |
3321 | + set_blocksize (pv->dev, HSM_BLOCKSIZE); | |
3322 | + pv->phys_nr = i; | |
3323 | + if (init_pv(pv)) | |
3324 | + goto out; | |
3325 | + } | |
3326 | + | |
3327 | + init_vg(vg); | |
3328 | + | |
3329 | + return 0; | |
3330 | + | |
3331 | +out: | |
3332 | + if (vg) { | |
3333 | + kfree(vg); | |
3334 | + mddev->private = NULL; | |
3335 | + } | |
3336 | + MOD_DEC_USE_COUNT; | |
3337 | + | |
3338 | + return 1; | |
3339 | +} | |
3340 | + | |
3341 | +static int hsm_stop (mddev_t *mddev) | |
3342 | +{ | |
3343 | + lv_t *lv; | |
3344 | + vg_t *vg; | |
3345 | + int i; | |
3346 | + | |
3347 | + vg = mddev_to_vg(mddev); | |
3348 | + | |
3349 | + for (i = 0; i < HSM_MAX_LVS_PER_VG; i++) { | |
3350 | + lv = vg->lv_array + i; | |
3351 | + if (!lv->log_id) | |
3352 | + continue; | |
3353 | + print_lv(lv); | |
3354 | + free_lv_indextree(lv); | |
3355 | + unmap_lv(lv); | |
3356 | + } | |
3357 | + for (i = 0; i < vg->nr_pv; i++) | |
3358 | + free_pv(vg->pv_array + i); | |
3359 | + | |
3360 | + kfree(vg); | |
3361 | + | |
3362 | + MOD_DEC_USE_COUNT; | |
3363 | + | |
3364 | + return 0; | |
3365 | +} | |
3366 | + | |
3367 | + | |
3368 | +static int hsm_status (char *page, mddev_t *mddev) | |
3369 | +{ | |
3370 | + int sz = 0, i; | |
3371 | + lv_t *lv; | |
3372 | + vg_t *vg; | |
3373 | + | |
3374 | + vg = mddev_to_vg(mddev); | |
3375 | + | |
3376 | + for (i = 0; i < HSM_MAX_LVS_PER_VG; i++) { | |
3377 | + lv = vg->lv_array + i; | |
3378 | + if (!lv->log_id) | |
3379 | + continue; | |
3380 | + sz += sprintf(page+sz, "<LV%d %d/%d blocks used> ", lv->log_id, | |
3381 | + lv->max_indices - lv->free_indices, lv->max_indices); | |
3382 | + } | |
3383 | + return sz; | |
3384 | +} | |
3385 | + | |
3386 | + | |
3387 | +static mdk_personality_t hsm_personality= | |
3388 | +{ | |
3389 | + "hsm", | |
3390 | + hsm_map, | |
3391 | + NULL, | |
3392 | + NULL, | |
3393 | + hsm_run, | |
3394 | + hsm_stop, | |
3395 | + hsm_status, | |
3396 | + NULL, | |
3397 | + 0, | |
3398 | + NULL, | |
3399 | + NULL, | |
3400 | + NULL, | |
3401 | + NULL | |
3402 | +}; | |
3403 | + | |
3404 | +#ifndef MODULE | |
3405 | + | |
3406 | +md__initfunc(void hsm_init (void)) | |
3407 | +{ | |
3408 | + register_md_personality (HSM, &hsm_personality); | |
3409 | +} | |
3410 | + | |
3411 | +#else | |
3412 | + | |
3413 | +int init_module (void) | |
3414 | +{ | |
3415 | + return (register_md_personality (HSM, &hsm_personality)); | |
3416 | +} | |
3417 | + | |
3418 | +void cleanup_module (void) | |
3419 | +{ | |
3420 | + unregister_md_personality (HSM); | |
3421 | +} | |
3422 | + | |
3423 | +#endif | |
3424 | + | |
3425 | +/* | |
3426 | + * This Linus-trick catches bugs via the linker. | |
3427 | + */ | |
3428 | + | |
3429 | +extern void __BUG__in__hsm_dot_c_1(void); | |
3430 | +extern void __BUG__in__hsm_dot_c_2(void); | |
3431 | +extern void __BUG__in__hsm_dot_c_3(void); | |
3432 | +extern void __BUG__in__hsm_dot_c_4(void); | |
3433 | +extern void __BUG__in__hsm_dot_c_5(void); | |
3434 | +extern void __BUG__in__hsm_dot_c_6(void); | |
3435 | +extern void __BUG__in__hsm_dot_c_7(void); | |
3436 | + | |
3437 | +void bugcatcher (void) | |
3438 | +{ | |
3439 | + if (sizeof(pv_block_group_t) != HSM_BLOCKSIZE) | |
3440 | + __BUG__in__hsm_dot_c_1(); | |
3441 | + if (sizeof(lv_index_block_t) != HSM_BLOCKSIZE) | |
3442 | + __BUG__in__hsm_dot_c_2(); | |
3443 | + | |
3444 | + if (sizeof(pv_sb_t) != HSM_BLOCKSIZE) | |
3445 | + __BUG__in__hsm_dot_c_4(); | |
3446 | + if (sizeof(lv_sb_t) != HSM_BLOCKSIZE) | |
3447 | + __BUG__in__hsm_dot_c_3(); | |
3448 | + if (sizeof(vg_sb_t) != HSM_BLOCKSIZE) | |
3449 | + __BUG__in__hsm_dot_c_6(); | |
3450 | + | |
3451 | + if (sizeof(lv_lptr_t) != 16) | |
3452 | + __BUG__in__hsm_dot_c_5(); | |
3453 | + if (sizeof(pv_pptr_t) != 16) | |
3454 | + __BUG__in__hsm_dot_c_6(); | |
3455 | +} | |
3456 | + | |
3457 | --- linux/drivers/block/linear.c.orig Sat Nov 8 20:39:12 1997 | |
3458 | +++ linux/drivers/block/linear.c Fri Nov 23 11:18:18 2001 | |
3459 | @@ -1,4 +1,3 @@ | |
3460 | - | |
3461 | /* | |
3462 | linear.c : Multiple Devices driver for Linux | |
3463 | Copyright (C) 1994-96 Marc ZYNGIER | |
3464 | @@ -19,186 +18,207 @@ | |
3465 | ||
3466 | #include <linux/module.h> | |
3467 | ||
3468 | -#include <linux/md.h> | |
3469 | +#include <linux/raid/md.h> | |
3470 | #include <linux/malloc.h> | |
3471 | -#include <linux/init.h> | |
3472 | ||
3473 | -#include "linear.h" | |
3474 | +#include <linux/raid/linear.h> | |
3475 | ||
3476 | #define MAJOR_NR MD_MAJOR | |
3477 | #define MD_DRIVER | |
3478 | #define MD_PERSONALITY | |
3479 | ||
3480 | -static int linear_run (int minor, struct md_dev *mddev) | |
3481 | +static int linear_run (mddev_t *mddev) | |
3482 | { | |
3483 | - int cur=0, i, size, dev0_size, nb_zone; | |
3484 | - struct linear_data *data; | |
3485 | - | |
3486 | - MOD_INC_USE_COUNT; | |
3487 | - | |
3488 | - mddev->private=kmalloc (sizeof (struct linear_data), GFP_KERNEL); | |
3489 | - data=(struct linear_data *) mddev->private; | |
3490 | - | |
3491 | - /* | |
3492 | - Find out the smallest device. This was previously done | |
3493 | - at registry time, but since it violates modularity, | |
3494 | - I moved it here... Any comment ? ;-) | |
3495 | - */ | |
3496 | - | |
3497 | - data->smallest=mddev->devices; | |
3498 | - for (i=1; i<mddev->nb_dev; i++) | |
3499 | - if (data->smallest->size > mddev->devices[i].size) | |
3500 | - data->smallest=mddev->devices+i; | |
3501 | - | |
3502 | - nb_zone=data->nr_zones= | |
3503 | - md_size[minor]/data->smallest->size + | |
3504 | - (md_size[minor]%data->smallest->size ? 1 : 0); | |
3505 | - | |
3506 | - data->hash_table=kmalloc (sizeof (struct linear_hash)*nb_zone, GFP_KERNEL); | |
3507 | - | |
3508 | - size=mddev->devices[cur].size; | |
3509 | + linear_conf_t *conf; | |
3510 | + struct linear_hash *table; | |
3511 | + mdk_rdev_t *rdev; | |
3512 | + int size, i, j, nb_zone; | |
3513 | + unsigned int curr_offset; | |
3514 | + | |
3515 | + MOD_INC_USE_COUNT; | |
3516 | + | |
3517 | + conf = kmalloc (sizeof (*conf), GFP_KERNEL); | |
3518 | + if (!conf) | |
3519 | + goto out; | |
3520 | + mddev->private = conf; | |
3521 | + | |
3522 | + if (md_check_ordering(mddev)) { | |
3523 | + printk("linear: disks are not ordered, aborting!\n"); | |
3524 | + goto out; | |
3525 | + } | |
3526 | + /* | |
3527 | + * Find the smallest device. | |
3528 | + */ | |
3529 | + | |
3530 | + conf->smallest = NULL; | |
3531 | + curr_offset = 0; | |
3532 | + ITERATE_RDEV_ORDERED(mddev,rdev,j) { | |
3533 | + dev_info_t *disk = conf->disks + j; | |
3534 | + | |
3535 | + disk->dev = rdev->dev; | |
3536 | + disk->size = rdev->size; | |
3537 | + disk->offset = curr_offset; | |
3538 | + | |
3539 | + curr_offset += disk->size; | |
3540 | + | |
3541 | + if (!conf->smallest || (disk->size < conf->smallest->size)) | |
3542 | + conf->smallest = disk; | |
3543 | + } | |
3544 | + | |
3545 | + nb_zone = conf->nr_zones = | |
3546 | + md_size[mdidx(mddev)] / conf->smallest->size + | |
3547 | + ((md_size[mdidx(mddev)] % conf->smallest->size) ? 1 : 0); | |
3548 | + | |
3549 | + conf->hash_table = kmalloc (sizeof (struct linear_hash) * nb_zone, | |
3550 | + GFP_KERNEL); | |
3551 | + if (!conf->hash_table) | |
3552 | + goto out; | |
3553 | + | |
3554 | + /* | |
3555 | + * Here we generate the linear hash table | |
3556 | + */ | |
3557 | + table = conf->hash_table; | |
3558 | + i = 0; | |
3559 | + size = 0; | |
3560 | + for (j = 0; j < mddev->nb_dev; j++) { | |
3561 | + dev_info_t *disk = conf->disks + j; | |
3562 | + | |
3563 | + if (size < 0) { | |
3564 | + table->dev1 = disk; | |
3565 | + table++; | |
3566 | + } | |
3567 | + size += disk->size; | |
3568 | + | |
3569 | + while (size) { | |
3570 | + table->dev0 = disk; | |
3571 | + size -= conf->smallest->size; | |
3572 | + if (size < 0) | |
3573 | + break; | |
3574 | + table->dev1 = NULL; | |
3575 | + table++; | |
3576 | + } | |
3577 | + } | |
3578 | + table->dev1 = NULL; | |
3579 | + | |
3580 | + return 0; | |
3581 | + | |
3582 | +out: | |
3583 | + if (conf) | |
3584 | + kfree(conf); | |
3585 | + MOD_DEC_USE_COUNT; | |
3586 | + return 1; | |
3587 | +} | |
3588 | + | |
3589 | +static int linear_stop (mddev_t *mddev) | |
3590 | +{ | |
3591 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
3592 | + | |
3593 | + kfree(conf->hash_table); | |
3594 | + kfree(conf); | |
3595 | ||
3596 | - i=0; | |
3597 | - while (cur<mddev->nb_dev) | |
3598 | - { | |
3599 | - data->hash_table[i].dev0=mddev->devices+cur; | |
3600 | + MOD_DEC_USE_COUNT; | |
3601 | ||
3602 | - if (size>=data->smallest->size) /* If we completely fill the slot */ | |
3603 | - { | |
3604 | - data->hash_table[i++].dev1=NULL; | |
3605 | - size-=data->smallest->size; | |
3606 | - | |
3607 | - if (!size) | |
3608 | - { | |
3609 | - if (++cur==mddev->nb_dev) continue; | |
3610 | - size=mddev->devices[cur].size; | |
3611 | - } | |
3612 | - | |
3613 | - continue; | |
3614 | - } | |
3615 | - | |
3616 | - if (++cur==mddev->nb_dev) /* Last dev, set dev1 as NULL */ | |
3617 | - { | |
3618 | - data->hash_table[i].dev1=NULL; | |
3619 | - continue; | |
3620 | - } | |
3621 | - | |
3622 | - dev0_size=size; /* Here, we use a 2nd dev to fill the slot */ | |
3623 | - size=mddev->devices[cur].size; | |
3624 | - data->hash_table[i++].dev1=mddev->devices+cur; | |
3625 | - size-=(data->smallest->size - dev0_size); | |
3626 | - } | |
3627 | - | |
3628 | - return 0; | |
3629 | -} | |
3630 | - | |
3631 | -static int linear_stop (int minor, struct md_dev *mddev) | |
3632 | -{ | |
3633 | - struct linear_data *data=(struct linear_data *) mddev->private; | |
3634 | - | |
3635 | - kfree (data->hash_table); | |
3636 | - kfree (data); | |
3637 | - | |
3638 | - MOD_DEC_USE_COUNT; | |
3639 | - | |
3640 | - return 0; | |
3641 | + return 0; | |
3642 | } | |
3643 | ||
3644 | ||
3645 | -static int linear_map (struct md_dev *mddev, kdev_t *rdev, | |
3646 | +static int linear_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
3647 | unsigned long *rsector, unsigned long size) | |
3648 | { | |
3649 | - struct linear_data *data=(struct linear_data *) mddev->private; | |
3650 | - struct linear_hash *hash; | |
3651 | - struct real_dev *tmp_dev; | |
3652 | - long block; | |
3653 | - | |
3654 | - block=*rsector >> 1; | |
3655 | - hash=data->hash_table+(block/data->smallest->size); | |
3656 | - | |
3657 | - if (block >= (hash->dev0->size + hash->dev0->offset)) | |
3658 | - { | |
3659 | - if (!hash->dev1) | |
3660 | - { | |
3661 | - printk ("linear_map : hash->dev1==NULL for block %ld\n", block); | |
3662 | - return (-1); | |
3663 | - } | |
3664 | - | |
3665 | - tmp_dev=hash->dev1; | |
3666 | - } | |
3667 | - else | |
3668 | - tmp_dev=hash->dev0; | |
3669 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
3670 | + struct linear_hash *hash; | |
3671 | + dev_info_t *tmp_dev; | |
3672 | + long block; | |
3673 | + | |
3674 | + block = *rsector >> 1; | |
3675 | + hash = conf->hash_table + (block / conf->smallest->size); | |
3676 | + | |
3677 | + if (block >= (hash->dev0->size + hash->dev0->offset)) | |
3678 | + { | |
3679 | + if (!hash->dev1) | |
3680 | + { | |
3681 | + printk ("linear_map : hash->dev1==NULL for block %ld\n", | |
3682 | + block); | |
3683 | + return -1; | |
3684 | + } | |
3685 | + tmp_dev = hash->dev1; | |
3686 | + } else | |
3687 | + tmp_dev = hash->dev0; | |
3688 | ||
3689 | - if (block >= (tmp_dev->size + tmp_dev->offset) || block < tmp_dev->offset) | |
3690 | - printk ("Block %ld out of bounds on dev %s size %d offset %d\n", | |
3691 | - block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset); | |
3692 | + if (block >= (tmp_dev->size + tmp_dev->offset) | |
3693 | + || block < tmp_dev->offset) | |
3694 | + printk ("Block %ld out of bounds on dev %s size %d offset %d\n", | |
3695 | + block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset); | |
3696 | ||
3697 | - *rdev=tmp_dev->dev; | |
3698 | - *rsector=(block-(tmp_dev->offset)) << 1; | |
3699 | + *rdev = tmp_dev->dev; | |
3700 | + *rsector = (block - tmp_dev->offset) << 1; | |
3701 | ||
3702 | - return (0); | |
3703 | + return 0; | |
3704 | } | |
3705 | ||
3706 | -static int linear_status (char *page, int minor, struct md_dev *mddev) | |
3707 | +static int linear_status (char *page, mddev_t *mddev) | |
3708 | { | |
3709 | - int sz=0; | |
3710 | + int sz=0; | |
3711 | ||
3712 | #undef MD_DEBUG | |
3713 | #ifdef MD_DEBUG | |
3714 | - int j; | |
3715 | - struct linear_data *data=(struct linear_data *) mddev->private; | |
3716 | + int j; | |
3717 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
3718 | ||
3719 | - sz+=sprintf (page+sz, " "); | |
3720 | - for (j=0; j<data->nr_zones; j++) | |
3721 | - { | |
3722 | - sz+=sprintf (page+sz, "[%s", | |
3723 | - partition_name (data->hash_table[j].dev0->dev)); | |
3724 | - | |
3725 | - if (data->hash_table[j].dev1) | |
3726 | - sz+=sprintf (page+sz, "/%s] ", | |
3727 | - partition_name(data->hash_table[j].dev1->dev)); | |
3728 | - else | |
3729 | - sz+=sprintf (page+sz, "] "); | |
3730 | - } | |
3731 | - | |
3732 | - sz+=sprintf (page+sz, "\n"); | |
3733 | + sz += sprintf(page+sz, " "); | |
3734 | + for (j = 0; j < conf->nr_zones; j++) | |
3735 | + { | |
3736 | + sz += sprintf(page+sz, "[%s", | |
3737 | + partition_name(conf->hash_table[j].dev0->dev)); | |
3738 | + | |
3739 | + if (conf->hash_table[j].dev1) | |
3740 | + sz += sprintf(page+sz, "/%s] ", | |
3741 | + partition_name(conf->hash_table[j].dev1->dev)); | |
3742 | + else | |
3743 | + sz += sprintf(page+sz, "] "); | |
3744 | + } | |
3745 | + sz += sprintf(page+sz, "\n"); | |
3746 | #endif | |
3747 | - sz+=sprintf (page+sz, " %dk rounding", 1<<FACTOR_SHIFT(FACTOR(mddev))); | |
3748 | - return sz; | |
3749 | + sz += sprintf(page+sz, " %dk rounding", mddev->param.chunk_size/1024); | |
3750 | + return sz; | |
3751 | } | |
3752 | ||
3753 | ||
3754 | -static struct md_personality linear_personality= | |
3755 | +static mdk_personality_t linear_personality= | |
3756 | { | |
3757 | - "linear", | |
3758 | - linear_map, | |
3759 | - NULL, | |
3760 | - NULL, | |
3761 | - linear_run, | |
3762 | - linear_stop, | |
3763 | - linear_status, | |
3764 | - NULL, /* no ioctls */ | |
3765 | - 0 | |
3766 | + "linear", | |
3767 | + linear_map, | |
3768 | + NULL, | |
3769 | + NULL, | |
3770 | + linear_run, | |
3771 | + linear_stop, | |
3772 | + linear_status, | |
3773 | + NULL, | |
3774 | + 0, | |
3775 | + NULL, | |
3776 | + NULL, | |
3777 | + NULL, | |
3778 | + NULL | |
3779 | }; | |
3780 | ||
3781 | - | |
3782 | #ifndef MODULE | |
3783 | ||
3784 | -__initfunc(void linear_init (void)) | |
3785 | +md__initfunc(void linear_init (void)) | |
3786 | { | |
3787 | - register_md_personality (LINEAR, &linear_personality); | |
3788 | + register_md_personality (LINEAR, &linear_personality); | |
3789 | } | |
3790 | ||
3791 | #else | |
3792 | ||
3793 | int init_module (void) | |
3794 | { | |
3795 | - return (register_md_personality (LINEAR, &linear_personality)); | |
3796 | + return (register_md_personality (LINEAR, &linear_personality)); | |
3797 | } | |
3798 | ||
3799 | void cleanup_module (void) | |
3800 | { | |
3801 | - unregister_md_personality (LINEAR); | |
3802 | + unregister_md_personality (LINEAR); | |
3803 | } | |
3804 | ||
3805 | #endif | |
3806 | + | |
3807 | --- linux/drivers/block/linear.h.orig Fri Nov 22 15:07:23 1996 | |
3808 | +++ linux/drivers/block/linear.h Fri Nov 23 11:18:18 2001 | |
3809 | @@ -1,16 +0,0 @@ | |
3810 | -#ifndef _LINEAR_H | |
3811 | -#define _LINEAR_H | |
3812 | - | |
3813 | -struct linear_hash | |
3814 | -{ | |
3815 | - struct real_dev *dev0, *dev1; | |
3816 | -}; | |
3817 | - | |
3818 | -struct linear_data | |
3819 | -{ | |
3820 | - struct linear_hash *hash_table; /* Dynamically allocated */ | |
3821 | - struct real_dev *smallest; | |
3822 | - int nr_zones; | |
3823 | -}; | |
3824 | - | |
3825 | -#endif | |
3826 | --- linux/drivers/block/ll_rw_blk.c.orig Fri Nov 23 11:17:44 2001 | |
3827 | +++ linux/drivers/block/ll_rw_blk.c Fri Nov 23 11:18:18 2001 | |
3828 | @@ -23,6 +23,7 @@ | |
3829 | #include <asm/io.h> | |
3830 | #include <asm/uaccess.h> | |
3831 | #include <linux/blk.h> | |
3832 | +#include <linux/raid/md.h> | |
3833 | ||
3834 | #ifdef CONFIG_POWERMAC | |
3835 | #include <asm/ide.h> | |
3836 | @@ -57,6 +58,11 @@ | |
3837 | spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; | |
3838 | ||
3839 | /* | |
3840 | + * per-major idle-IO detection | |
3841 | + */ | |
3842 | +unsigned long io_events[MAX_BLKDEV] = {0, }; | |
3843 | + | |
3844 | +/* | |
3845 | * used to wait on when there are no free requests | |
3846 | */ | |
3847 | struct wait_queue * wait_for_request; | |
3848 | @@ -587,6 +593,8 @@ | |
3849 | return; | |
3850 | /* Maybe the above fixes it, and maybe it doesn't boot. Life is interesting */ | |
3851 | lock_buffer(bh); | |
3852 | + if (!buffer_lowprio(bh)) | |
3853 | + io_events[major]++; | |
3854 | ||
3855 | if (blk_size[major]) { | |
3856 | unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1; | |
3857 | @@ -836,7 +844,7 @@ | |
3858 | bh[i]->b_rsector=bh[i]->b_blocknr*(bh[i]->b_size >> 9); | |
3859 | #ifdef CONFIG_BLK_DEV_MD | |
3860 | if (major==MD_MAJOR && | |
3861 | - md_map (MINOR(bh[i]->b_dev), &bh[i]->b_rdev, | |
3862 | + md_map (bh[i]->b_dev, &bh[i]->b_rdev, | |
3863 | &bh[i]->b_rsector, bh[i]->b_size >> 9)) { | |
3864 | printk (KERN_ERR | |
3865 | "Bad md_map in ll_rw_block\n"); | |
3866 | @@ -856,7 +864,7 @@ | |
3867 | set_bit(BH_Req, &bh[i]->b_state); | |
3868 | #ifdef CONFIG_BLK_DEV_MD | |
3869 | if (MAJOR(bh[i]->b_dev) == MD_MAJOR) { | |
3870 | - md_make_request(MINOR (bh[i]->b_dev), rw, bh[i]); | |
3871 | + md_make_request(bh[i], rw); | |
3872 | continue; | |
3873 | } | |
3874 | #endif | |
3875 | --- linux/drivers/block/md.c.orig Mon Sep 4 19:39:16 2000 | |
3876 | +++ linux/drivers/block/md.c Fri Nov 23 11:18:18 2001 | |
3877 | @@ -1,21 +1,17 @@ | |
3878 | - | |
3879 | /* | |
3880 | md.c : Multiple Devices driver for Linux | |
3881 | - Copyright (C) 1994-96 Marc ZYNGIER | |
3882 | - <zyngier@ufr-info-p7.ibp.fr> or | |
3883 | - <maz@gloups.fdn.fr> | |
3884 | + Copyright (C) 1998, 1999 Ingo Molnar | |
3885 | ||
3886 | - A lot of inspiration came from hd.c ... | |
3887 | + completely rewritten, based on the MD driver code from Marc Zyngier | |
3888 | ||
3889 | - kerneld support by Boris Tobotras <boris@xtalk.msk.su> | |
3890 | - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net> | |
3891 | + Changes: | |
3892 | ||
3893 | - RAID-1/RAID-5 extensions by: | |
3894 | - Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
3895 | + - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar | |
3896 | + - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net> | |
3897 | + - kerneld support by Boris Tobotras <boris@xtalk.msk.su> | |
3898 | + - kmod support by: Cyrus Durgin | |
3899 | + - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com> | |
3900 | ||
3901 | - Changes for kmod by: | |
3902 | - Cyrus Durgin | |
3903 | - | |
3904 | This program is free software; you can redistribute it and/or modify | |
3905 | it under the terms of the GNU General Public License as published by | |
3906 | the Free Software Foundation; either version 2, or (at your option) | |
3907 | @@ -26,807 +22,3007 @@ | |
3908 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
3909 | */ | |
3910 | ||
3911 | -/* | |
3912 | - * Current RAID-1,4,5 parallel reconstruction speed limit is 1024 KB/sec, so | |
3913 | - * the extra system load does not show up that much. Increase it if your | |
3914 | - * system can take more. | |
3915 | - */ | |
3916 | -#define SPEED_LIMIT 1024 | |
3917 | +#include <linux/raid/md.h> | |
3918 | +#include <linux/raid/xor.h> | |
3919 | ||
3920 | -#include <linux/config.h> | |
3921 | -#include <linux/module.h> | |
3922 | -#include <linux/version.h> | |
3923 | -#include <linux/malloc.h> | |
3924 | -#include <linux/mm.h> | |
3925 | -#include <linux/md.h> | |
3926 | -#include <linux/hdreg.h> | |
3927 | -#include <linux/stat.h> | |
3928 | -#include <linux/fs.h> | |
3929 | -#include <linux/proc_fs.h> | |
3930 | -#include <linux/blkdev.h> | |
3931 | -#include <linux/genhd.h> | |
3932 | -#include <linux/smp_lock.h> | |
3933 | #ifdef CONFIG_KMOD | |
3934 | #include <linux/kmod.h> | |
3935 | #endif | |
3936 | -#include <linux/errno.h> | |
3937 | -#include <linux/init.h> | |
3938 | ||
3939 | #define __KERNEL_SYSCALLS__ | |
3940 | #include <linux/unistd.h> | |
3941 | ||
3942 | +#include <asm/unaligned.h> | |
3943 | + | |
3944 | +extern asmlinkage int sys_sched_yield(void); | |
3945 | +extern asmlinkage int sys_setsid(void); | |
3946 | + | |
3947 | +extern unsigned long io_events[MAX_BLKDEV]; | |
3948 | + | |
3949 | #define MAJOR_NR MD_MAJOR | |
3950 | #define MD_DRIVER | |
3951 | ||
3952 | #include <linux/blk.h> | |
3953 | -#include <asm/uaccess.h> | |
3954 | -#include <asm/bitops.h> | |
3955 | -#include <asm/atomic.h> | |
3956 | ||
3957 | #ifdef CONFIG_MD_BOOT | |
3958 | -extern kdev_t name_to_kdev_t(char *line) __init; | |
3959 | +extern kdev_t name_to_kdev_t(char *line) md__init; | |
3960 | #endif | |
3961 | ||
3962 | -static struct hd_struct md_hd_struct[MAX_MD_DEV]; | |
3963 | -static int md_blocksizes[MAX_MD_DEV]; | |
3964 | -int md_maxreadahead[MAX_MD_DEV]; | |
3965 | -#if SUPPORT_RECONSTRUCTION | |
3966 | -static struct md_thread *md_sync_thread = NULL; | |
3967 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
3968 | +static mdk_personality_t *pers[MAX_PERSONALITY] = {NULL, }; | |
3969 | + | |
3970 | +/* | |
3971 | + * these have to be allocated separately because external | |
3972 | + * subsystems want to have a pre-defined structure | |
3973 | + */ | |
3974 | +struct hd_struct md_hd_struct[MAX_MD_DEVS]; | |
3975 | +static int md_blocksizes[MAX_MD_DEVS]; | |
3976 | +static int md_maxreadahead[MAX_MD_DEVS]; | |
3977 | +static mdk_thread_t *md_recovery_thread = NULL; | |
3978 | ||
3979 | -int md_size[MAX_MD_DEV]={0, }; | |
3980 | +int md_size[MAX_MD_DEVS] = {0, }; | |
3981 | ||
3982 | static void md_geninit (struct gendisk *); | |
3983 | ||
3984 | static struct gendisk md_gendisk= | |
3985 | { | |
3986 | - MD_MAJOR, | |
3987 | - "md", | |
3988 | - 0, | |
3989 | - 1, | |
3990 | - MAX_MD_DEV, | |
3991 | - md_geninit, | |
3992 | - md_hd_struct, | |
3993 | - md_size, | |
3994 | - MAX_MD_DEV, | |
3995 | - NULL, | |
3996 | - NULL | |
3997 | + MD_MAJOR, | |
3998 | + "md", | |
3999 | + 0, | |
4000 | + 1, | |
4001 | + MAX_MD_DEVS, | |
4002 | + md_geninit, | |
4003 | + md_hd_struct, | |
4004 | + md_size, | |
4005 | + MAX_MD_DEVS, | |
4006 | + NULL, | |
4007 | + NULL | |
4008 | }; | |
4009 | ||
4010 | -static struct md_personality *pers[MAX_PERSONALITY]={NULL, }; | |
4011 | -struct md_dev md_dev[MAX_MD_DEV]; | |
4012 | - | |
4013 | -int md_thread(void * arg); | |
4014 | +/* | |
4015 | + * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | |
4016 | + * is 100 KB/sec, so the extra system load does not show up that much. | |
4017 | + * Increase it if you want to have more _guaranteed_ speed. Note that | |
4018 | + * the RAID driver will use the maximum available bandwith if the IO | |
4019 | + * subsystem is idle. | |
4020 | + * | |
4021 | + * you can change it via /proc/sys/dev/speed-limit | |
4022 | + */ | |
4023 | ||
4024 | -static struct gendisk *find_gendisk (kdev_t dev) | |
4025 | -{ | |
4026 | - struct gendisk *tmp=gendisk_head; | |
4027 | +static int sysctl_speed_limit = 100; | |
4028 | ||
4029 | - while (tmp != NULL) | |
4030 | - { | |
4031 | - if (tmp->major==MAJOR(dev)) | |
4032 | - return (tmp); | |
4033 | - | |
4034 | - tmp=tmp->next; | |
4035 | - } | |
4036 | +static struct ctl_table_header *md_table_header; | |
4037 | ||
4038 | - return (NULL); | |
4039 | -} | |
4040 | +static ctl_table md_table[] = { | |
4041 | + {DEV_MD_SPEED_LIMIT, "speed-limit", | |
4042 | + &sysctl_speed_limit, sizeof(int), 0644, NULL, &proc_dointvec}, | |
4043 | + {0} | |
4044 | +}; | |
4045 | ||
4046 | -char *partition_name (kdev_t dev) | |
4047 | -{ | |
4048 | - static char name[40]; /* This should be long | |
4049 | - enough for a device name ! */ | |
4050 | - struct gendisk *hd = find_gendisk (dev); | |
4051 | +static ctl_table md_dir_table[] = { | |
4052 | + {DEV_MD, "md", NULL, 0, 0555, md_table}, | |
4053 | + {0} | |
4054 | +}; | |
4055 | ||
4056 | - if (!hd) | |
4057 | - { | |
4058 | - sprintf (name, "[dev %s]", kdevname(dev)); | |
4059 | - return (name); | |
4060 | - } | |
4061 | +static ctl_table md_root_table[] = { | |
4062 | + {CTL_DEV, "dev", NULL, 0, 0555, md_dir_table}, | |
4063 | + {0} | |
4064 | +}; | |
4065 | ||
4066 | - return disk_name (hd, MINOR(dev), name); /* routine in genhd.c */ | |
4067 | +static void md_register_sysctl(void) | |
4068 | +{ | |
4069 | + md_table_header = register_sysctl_table(md_root_table, 1); | |
4070 | } | |
4071 | ||
4072 | -static int legacy_raid_sb (int minor, int pnum) | |
4073 | +void md_unregister_sysctl(void) | |
4074 | { | |
4075 | - int i, factor; | |
4076 | + unregister_sysctl_table(md_table_header); | |
4077 | +} | |
4078 | + | |
4079 | +/* | |
4080 | + * The mapping between kdev and mddev is not necessary a simple | |
4081 | + * one! Eg. HSM uses several sub-devices to implement Logical | |
4082 | + * Volumes. All these sub-devices map to the same mddev. | |
4083 | + */ | |
4084 | +dev_mapping_t mddev_map [MAX_MD_DEVS] = { {NULL, 0}, }; | |
4085 | ||
4086 | - factor = 1 << FACTOR_SHIFT(FACTOR((md_dev+minor))); | |
4087 | +void add_mddev_mapping (mddev_t * mddev, kdev_t dev, void *data) | |
4088 | +{ | |
4089 | + unsigned int minor = MINOR(dev); | |
4090 | ||
4091 | - /***** | |
4092 | - * do size and offset calculations. | |
4093 | - */ | |
4094 | - for (i=0; i<md_dev[minor].nb_dev; i++) { | |
4095 | - md_dev[minor].devices[i].size &= ~(factor - 1); | |
4096 | - md_size[minor] += md_dev[minor].devices[i].size; | |
4097 | - md_dev[minor].devices[i].offset=i ? (md_dev[minor].devices[i-1].offset + | |
4098 | - md_dev[minor].devices[i-1].size) : 0; | |
4099 | + if (MAJOR(dev) != MD_MAJOR) { | |
4100 | + MD_BUG(); | |
4101 | + return; | |
4102 | } | |
4103 | - if (pnum == RAID0 >> PERSONALITY_SHIFT) | |
4104 | - md_maxreadahead[minor] = MD_DEFAULT_DISK_READAHEAD * md_dev[minor].nb_dev; | |
4105 | - return 0; | |
4106 | + if (mddev_map[minor].mddev != NULL) { | |
4107 | + MD_BUG(); | |
4108 | + return; | |
4109 | + } | |
4110 | + mddev_map[minor].mddev = mddev; | |
4111 | + mddev_map[minor].data = data; | |
4112 | } | |
4113 | ||
4114 | -static void free_sb (struct md_dev *mddev) | |
4115 | +void del_mddev_mapping (mddev_t * mddev, kdev_t dev) | |
4116 | { | |
4117 | - int i; | |
4118 | - struct real_dev *realdev; | |
4119 | + unsigned int minor = MINOR(dev); | |
4120 | ||
4121 | - if (mddev->sb) { | |
4122 | - free_page((unsigned long) mddev->sb); | |
4123 | - mddev->sb = NULL; | |
4124 | + if (MAJOR(dev) != MD_MAJOR) { | |
4125 | + MD_BUG(); | |
4126 | + return; | |
4127 | } | |
4128 | - for (i = 0; i <mddev->nb_dev; i++) { | |
4129 | - realdev = mddev->devices + i; | |
4130 | - if (realdev->sb) { | |
4131 | - free_page((unsigned long) realdev->sb); | |
4132 | - realdev->sb = NULL; | |
4133 | - } | |
4134 | + if (mddev_map[minor].mddev != mddev) { | |
4135 | + MD_BUG(); | |
4136 | + return; | |
4137 | } | |
4138 | + mddev_map[minor].mddev = NULL; | |
4139 | + mddev_map[minor].data = NULL; | |
4140 | } | |
4141 | ||
4142 | /* | |
4143 | - * Check one RAID superblock for generic plausibility | |
4144 | + * Enables to iterate over all existing md arrays | |
4145 | */ | |
4146 | +static MD_LIST_HEAD(all_mddevs); | |
4147 | ||
4148 | -#define BAD_MAGIC KERN_ERR \ | |
4149 | -"md: %s: invalid raid superblock magic (%x) on block %u\n" | |
4150 | +static mddev_t * alloc_mddev (kdev_t dev) | |
4151 | +{ | |
4152 | + mddev_t * mddev; | |
4153 | ||
4154 | -#define OUT_OF_MEM KERN_ALERT \ | |
4155 | -"md: out of memory.\n" | |
4156 | + if (MAJOR(dev) != MD_MAJOR) { | |
4157 | + MD_BUG(); | |
4158 | + return 0; | |
4159 | + } | |
4160 | + mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); | |
4161 | + if (!mddev) | |
4162 | + return NULL; | |
4163 | + | |
4164 | + memset(mddev, 0, sizeof(*mddev)); | |
4165 | ||
4166 | -#define NO_DEVICE KERN_ERR \ | |
4167 | -"md: disabled device %s\n" | |
4168 | + mddev->__minor = MINOR(dev); | |
4169 | + mddev->reconfig_sem = MUTEX; | |
4170 | + mddev->recovery_sem = MUTEX; | |
4171 | + mddev->resync_sem = MUTEX; | |
4172 | + MD_INIT_LIST_HEAD(&mddev->disks); | |
4173 | + /* | |
4174 | + * The 'base' mddev is the one with data NULL. | |
4175 | + * personalities can create additional mddevs | |
4176 | + * if necessary. | |
4177 | + */ | |
4178 | + add_mddev_mapping(mddev, dev, 0); | |
4179 | + md_list_add(&mddev->all_mddevs, &all_mddevs); | |
4180 | ||
4181 | -#define SUCCESS 0 | |
4182 | -#define FAILURE -1 | |
4183 | + return mddev; | |
4184 | +} | |
4185 | ||
4186 | -static int analyze_one_sb (struct real_dev * rdev) | |
4187 | +static void free_mddev (mddev_t *mddev) | |
4188 | { | |
4189 | - int ret = FAILURE; | |
4190 | - struct buffer_head *bh; | |
4191 | - kdev_t dev = rdev->dev; | |
4192 | - md_superblock_t *sb; | |
4193 | + if (!mddev) { | |
4194 | + MD_BUG(); | |
4195 | + return; | |
4196 | + } | |
4197 | ||
4198 | /* | |
4199 | - * Read the superblock, it's at the end of the disk | |
4200 | + * Make sure nobody else is using this mddev | |
4201 | + * (careful, we rely on the global kernel lock here) | |
4202 | */ | |
4203 | - rdev->sb_offset = MD_NEW_SIZE_BLOCKS (blk_size[MAJOR(dev)][MINOR(dev)]); | |
4204 | - set_blocksize (dev, MD_SB_BYTES); | |
4205 | - bh = bread (dev, rdev->sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
4206 | - | |
4207 | - if (bh) { | |
4208 | - sb = (md_superblock_t *) bh->b_data; | |
4209 | - if (sb->md_magic != MD_SB_MAGIC) { | |
4210 | - printk (BAD_MAGIC, kdevname(dev), | |
4211 | - sb->md_magic, rdev->sb_offset); | |
4212 | - goto abort; | |
4213 | - } | |
4214 | - rdev->sb = (md_superblock_t *) __get_free_page(GFP_KERNEL); | |
4215 | - if (!rdev->sb) { | |
4216 | - printk (OUT_OF_MEM); | |
4217 | - goto abort; | |
4218 | - } | |
4219 | - memcpy (rdev->sb, bh->b_data, MD_SB_BYTES); | |
4220 | + while (md_atomic_read(&mddev->resync_sem.count) != 1) | |
4221 | + schedule(); | |
4222 | + while (md_atomic_read(&mddev->recovery_sem.count) != 1) | |
4223 | + schedule(); | |
4224 | ||
4225 | - rdev->size = sb->size; | |
4226 | - } else | |
4227 | - printk (NO_DEVICE,kdevname(rdev->dev)); | |
4228 | - ret = SUCCESS; | |
4229 | -abort: | |
4230 | - if (bh) | |
4231 | - brelse (bh); | |
4232 | - return ret; | |
4233 | + del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev))); | |
4234 | + md_list_del(&mddev->all_mddevs); | |
4235 | + MD_INIT_LIST_HEAD(&mddev->all_mddevs); | |
4236 | + kfree(mddev); | |
4237 | } | |
4238 | ||
4239 | -#undef SUCCESS | |
4240 | -#undef FAILURE | |
4241 | - | |
4242 | -#undef BAD_MAGIC | |
4243 | -#undef OUT_OF_MEM | |
4244 | -#undef NO_DEVICE | |
4245 | ||
4246 | -/* | |
4247 | - * Check a full RAID array for plausibility | |
4248 | - */ | |
4249 | +struct gendisk * find_gendisk (kdev_t dev) | |
4250 | +{ | |
4251 | + struct gendisk *tmp = gendisk_head; | |
4252 | ||
4253 | -#define INCONSISTENT KERN_ERR \ | |
4254 | -"md: superblock inconsistency -- run ckraid\n" | |
4255 | + while (tmp != NULL) { | |
4256 | + if (tmp->major == MAJOR(dev)) | |
4257 | + return (tmp); | |
4258 | + tmp = tmp->next; | |
4259 | + } | |
4260 | + return (NULL); | |
4261 | +} | |
4262 | ||
4263 | -#define OUT_OF_DATE KERN_ERR \ | |
4264 | -"md: superblock update time inconsistenty -- using the most recent one\n" | |
4265 | +mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) | |
4266 | +{ | |
4267 | + mdk_rdev_t * rdev; | |
4268 | + struct md_list_head *tmp; | |
4269 | ||
4270 | -#define OLD_VERSION KERN_ALERT \ | |
4271 | -"md: %s: unsupported raid array version %d.%d.%d\n" | |
4272 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
4273 | + if (rdev->desc_nr == nr) | |
4274 | + return rdev; | |
4275 | + } | |
4276 | + return NULL; | |
4277 | +} | |
4278 | ||
4279 | -#define NOT_CLEAN KERN_ERR \ | |
4280 | -"md: %s: raid array is not clean -- run ckraid\n" | |
4281 | +mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev) | |
4282 | +{ | |
4283 | + struct md_list_head *tmp; | |
4284 | + mdk_rdev_t *rdev; | |
4285 | ||
4286 | -#define NOT_CLEAN_IGNORE KERN_ERR \ | |
4287 | -"md: %s: raid array is not clean -- reconstructing parity\n" | |
4288 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
4289 | + if (rdev->dev == dev) | |
4290 | + return rdev; | |
4291 | + } | |
4292 | + return NULL; | |
4293 | +} | |
4294 | ||
4295 | -#define UNKNOWN_LEVEL KERN_ERR \ | |
4296 | -"md: %s: unsupported raid level %d\n" | |
4297 | +static MD_LIST_HEAD(device_names); | |
4298 | ||
4299 | -static int analyze_sbs (int minor, int pnum) | |
4300 | +char * partition_name (kdev_t dev) | |
4301 | { | |
4302 | - struct md_dev *mddev = md_dev + minor; | |
4303 | - int i, N = mddev->nb_dev, out_of_date = 0; | |
4304 | - struct real_dev * disks = mddev->devices; | |
4305 | - md_superblock_t *sb, *freshest = NULL; | |
4306 | + struct gendisk *hd; | |
4307 | + static char nomem [] = "<nomem>"; | |
4308 | + dev_name_t *dname; | |
4309 | + struct md_list_head *tmp = device_names.next; | |
4310 | ||
4311 | - /* | |
4312 | - * RAID-0 and linear don't use a RAID superblock | |
4313 | - */ | |
4314 | - if (pnum == RAID0 >> PERSONALITY_SHIFT || | |
4315 | - pnum == LINEAR >> PERSONALITY_SHIFT) | |
4316 | - return legacy_raid_sb (minor, pnum); | |
4317 | + while (tmp != &device_names) { | |
4318 | + dname = md_list_entry(tmp, dev_name_t, list); | |
4319 | + if (dname->dev == dev) | |
4320 | + return dname->name; | |
4321 | + tmp = tmp->next; | |
4322 | + } | |
4323 | + | |
4324 | + dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL); | |
4325 | ||
4326 | + if (!dname) | |
4327 | + return nomem; | |
4328 | /* | |
4329 | - * Verify the RAID superblock on each real device | |
4330 | + * ok, add this new device name to the list | |
4331 | */ | |
4332 | - for (i = 0; i < N; i++) | |
4333 | - if (analyze_one_sb(disks+i)) | |
4334 | - goto abort; | |
4335 | + hd = find_gendisk (dev); | |
4336 | + | |
4337 | + if (!hd) | |
4338 | + sprintf (dname->name, "[dev %s]", kdevname(dev)); | |
4339 | + else | |
4340 | + disk_name (hd, MINOR(dev), dname->name); | |
4341 | + | |
4342 | + dname->dev = dev; | |
4343 | + md_list_add(&dname->list, &device_names); | |
4344 | + | |
4345 | + return dname->name; | |
4346 | +} | |
4347 | + | |
4348 | +static unsigned int calc_dev_sboffset (kdev_t dev, mddev_t *mddev, | |
4349 | + int persistent) | |
4350 | +{ | |
4351 | + unsigned int size = 0; | |
4352 | + | |
4353 | + if (blk_size[MAJOR(dev)]) | |
4354 | + size = blk_size[MAJOR(dev)][MINOR(dev)]; | |
4355 | + if (persistent) | |
4356 | + size = MD_NEW_SIZE_BLOCKS(size); | |
4357 | + return size; | |
4358 | +} | |
4359 | + | |
4360 | +static unsigned int calc_dev_size (kdev_t dev, mddev_t *mddev, int persistent) | |
4361 | +{ | |
4362 | + unsigned int size; | |
4363 | + | |
4364 | + size = calc_dev_sboffset(dev, mddev, persistent); | |
4365 | + if (!mddev->sb) { | |
4366 | + MD_BUG(); | |
4367 | + return size; | |
4368 | + } | |
4369 | + if (mddev->sb->chunk_size) | |
4370 | + size &= ~(mddev->sb->chunk_size/1024 - 1); | |
4371 | + return size; | |
4372 | +} | |
4373 | + | |
4374 | +/* | |
4375 | + * We check wether all devices are numbered from 0 to nb_dev-1. The | |
4376 | + * order is guaranteed even after device name changes. | |
4377 | + * | |
4378 | + * Some personalities (raid0, linear) use this. Personalities that | |
4379 | + * provide data have to be able to deal with loss of individual | |
4380 | + * disks, so they do their checking themselves. | |
4381 | + */ | |
4382 | +int md_check_ordering (mddev_t *mddev) | |
4383 | +{ | |
4384 | + int i, c; | |
4385 | + mdk_rdev_t *rdev; | |
4386 | + struct md_list_head *tmp; | |
4387 | ||
4388 | /* | |
4389 | - * The superblock constant part has to be the same | |
4390 | - * for all disks in the array. | |
4391 | + * First, all devices must be fully functional | |
4392 | */ | |
4393 | - sb = NULL; | |
4394 | - for (i = 0; i < N; i++) { | |
4395 | - if (!disks[i].sb) | |
4396 | - continue; | |
4397 | - if (!sb) { | |
4398 | - sb = disks[i].sb; | |
4399 | - continue; | |
4400 | - } | |
4401 | - if (memcmp(sb, | |
4402 | - disks[i].sb, MD_SB_GENERIC_CONSTANT_WORDS * 4)) { | |
4403 | - printk (INCONSISTENT); | |
4404 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
4405 | + if (rdev->faulty) { | |
4406 | + printk("md: md%d's device %s faulty, aborting.\n", | |
4407 | + mdidx(mddev), partition_name(rdev->dev)); | |
4408 | goto abort; | |
4409 | } | |
4410 | } | |
4411 | ||
4412 | - /* | |
4413 | - * OK, we have all disks and the array is ready to run. Let's | |
4414 | - * find the freshest superblock, that one will be the superblock | |
4415 | - * that represents the whole array. | |
4416 | - */ | |
4417 | - if ((sb = mddev->sb = (md_superblock_t *) __get_free_page (GFP_KERNEL)) == NULL) | |
4418 | + c = 0; | |
4419 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
4420 | + c++; | |
4421 | + } | |
4422 | + if (c != mddev->nb_dev) { | |
4423 | + MD_BUG(); | |
4424 | goto abort; | |
4425 | - freshest = NULL; | |
4426 | - for (i = 0; i < N; i++) { | |
4427 | - if (!disks[i].sb) | |
4428 | - continue; | |
4429 | - if (!freshest) { | |
4430 | - freshest = disks[i].sb; | |
4431 | - continue; | |
4432 | - } | |
4433 | - /* | |
4434 | - * Find the newest superblock version | |
4435 | - */ | |
4436 | - if (disks[i].sb->utime != freshest->utime) { | |
4437 | - out_of_date = 1; | |
4438 | - if (disks[i].sb->utime > freshest->utime) | |
4439 | - freshest = disks[i].sb; | |
4440 | - } | |
4441 | } | |
4442 | - if (out_of_date) | |
4443 | - printk(OUT_OF_DATE); | |
4444 | - memcpy (sb, freshest, sizeof(*freshest)); | |
4445 | - | |
4446 | - /* | |
4447 | - * Check if we can support this RAID array | |
4448 | - */ | |
4449 | - if (sb->major_version != MD_MAJOR_VERSION || | |
4450 | - sb->minor_version > MD_MINOR_VERSION) { | |
4451 | - | |
4452 | - printk (OLD_VERSION, kdevname(MKDEV(MD_MAJOR, minor)), | |
4453 | - sb->major_version, sb->minor_version, | |
4454 | - sb->patch_version); | |
4455 | + if (mddev->nb_dev != mddev->sb->raid_disks) { | |
4456 | + printk("md: md%d, array needs %d disks, has %d, aborting.\n", | |
4457 | + mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev); | |
4458 | goto abort; | |
4459 | } | |
4460 | - | |
4461 | /* | |
4462 | - * We need to add this as a superblock option. | |
4463 | + * Now the numbering check | |
4464 | */ | |
4465 | -#if SUPPORT_RECONSTRUCTION | |
4466 | - if (sb->state != (1 << MD_SB_CLEAN)) { | |
4467 | - if (sb->level == 1) { | |
4468 | - printk (NOT_CLEAN, kdevname(MKDEV(MD_MAJOR, minor))); | |
4469 | + for (i = 0; i < mddev->nb_dev; i++) { | |
4470 | + c = 0; | |
4471 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
4472 | + if (rdev->desc_nr == i) | |
4473 | + c++; | |
4474 | + } | |
4475 | + if (c == 0) { | |
4476 | + printk("md: md%d, missing disk #%d, aborting.\n", | |
4477 | + mdidx(mddev), i); | |
4478 | goto abort; | |
4479 | - } else | |
4480 | - printk (NOT_CLEAN_IGNORE, kdevname(MKDEV(MD_MAJOR, minor))); | |
4481 | - } | |
4482 | -#else | |
4483 | - if (sb->state != (1 << MD_SB_CLEAN)) { | |
4484 | - printk (NOT_CLEAN, kdevname(MKDEV(MD_MAJOR, minor))); | |
4485 | - goto abort; | |
4486 | - } | |
4487 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
4488 | - | |
4489 | - switch (sb->level) { | |
4490 | - case 1: | |
4491 | - md_size[minor] = sb->size; | |
4492 | - md_maxreadahead[minor] = MD_DEFAULT_DISK_READAHEAD; | |
4493 | - break; | |
4494 | - case 4: | |
4495 | - case 5: | |
4496 | - md_size[minor] = sb->size * (sb->raid_disks - 1); | |
4497 | - md_maxreadahead[minor] = MD_DEFAULT_DISK_READAHEAD * (sb->raid_disks - 1); | |
4498 | - break; | |
4499 | - default: | |
4500 | - printk (UNKNOWN_LEVEL, kdevname(MKDEV(MD_MAJOR, minor)), | |
4501 | - sb->level); | |
4502 | + } | |
4503 | + if (c > 1) { | |
4504 | + printk("md: md%d, too many disks #%d, aborting.\n", | |
4505 | + mdidx(mddev), i); | |
4506 | goto abort; | |
4507 | + } | |
4508 | } | |
4509 | return 0; | |
4510 | abort: | |
4511 | - free_sb(mddev); | |
4512 | return 1; | |
4513 | } | |
4514 | ||
4515 | -#undef INCONSISTENT | |
4516 | -#undef OUT_OF_DATE | |
4517 | -#undef OLD_VERSION | |
4518 | -#undef NOT_CLEAN | |
4519 | -#undef OLD_LEVEL | |
4520 | - | |
4521 | -int md_update_sb(int minor) | |
4522 | +static unsigned int zoned_raid_size (mddev_t *mddev) | |
4523 | { | |
4524 | - struct md_dev *mddev = md_dev + minor; | |
4525 | - struct buffer_head *bh; | |
4526 | - md_superblock_t *sb = mddev->sb; | |
4527 | - struct real_dev *realdev; | |
4528 | - kdev_t dev; | |
4529 | - int i; | |
4530 | - u32 sb_offset; | |
4531 | + unsigned int mask; | |
4532 | + mdk_rdev_t * rdev; | |
4533 | + struct md_list_head *tmp; | |
4534 | ||
4535 | - sb->utime = CURRENT_TIME; | |
4536 | - for (i = 0; i < mddev->nb_dev; i++) { | |
4537 | - realdev = mddev->devices + i; | |
4538 | - if (!realdev->sb) | |
4539 | - continue; | |
4540 | - dev = realdev->dev; | |
4541 | - sb_offset = realdev->sb_offset; | |
4542 | - set_blocksize(dev, MD_SB_BYTES); | |
4543 | - printk("md: updating raid superblock on device %s, sb_offset == %u\n", kdevname(dev), sb_offset); | |
4544 | - bh = getblk(dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
4545 | - if (bh) { | |
4546 | - sb = (md_superblock_t *) bh->b_data; | |
4547 | - memcpy(sb, mddev->sb, MD_SB_BYTES); | |
4548 | - memcpy(&sb->descriptor, sb->disks + realdev->sb->descriptor.number, MD_SB_DESCRIPTOR_WORDS * 4); | |
4549 | - mark_buffer_uptodate(bh, 1); | |
4550 | - mark_buffer_dirty(bh, 1); | |
4551 | - ll_rw_block(WRITE, 1, &bh); | |
4552 | - wait_on_buffer(bh); | |
4553 | - bforget(bh); | |
4554 | - fsync_dev(dev); | |
4555 | - invalidate_buffers(dev); | |
4556 | - } else | |
4557 | - printk(KERN_ERR "md: getblk failed for device %s\n", kdevname(dev)); | |
4558 | + if (!mddev->sb) { | |
4559 | + MD_BUG(); | |
4560 | + return -EINVAL; | |
4561 | + } | |
4562 | + /* | |
4563 | + * do size and offset calculations. | |
4564 | + */ | |
4565 | + mask = ~(mddev->sb->chunk_size/1024 - 1); | |
4566 | +printk("mask %08x\n", mask); | |
4567 | + | |
4568 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
4569 | +printk(" rdev->size: %d\n", rdev->size); | |
4570 | + rdev->size &= mask; | |
4571 | +printk(" masked rdev->size: %d\n", rdev->size); | |
4572 | + md_size[mdidx(mddev)] += rdev->size; | |
4573 | +printk(" new md_size: %d\n", md_size[mdidx(mddev)]); | |
4574 | } | |
4575 | return 0; | |
4576 | } | |
4577 | ||
4578 | -static int do_md_run (int minor, int repart) | |
4579 | +static void remove_descriptor (mdp_disk_t *disk, mdp_super_t *sb) | |
4580 | { | |
4581 | - int pnum, i, min, factor, err; | |
4582 | + if (disk_active(disk)) { | |
4583 | + sb->working_disks--; | |
4584 | + } else { | |
4585 | + if (disk_spare(disk)) { | |
4586 | + sb->spare_disks--; | |
4587 | + sb->working_disks--; | |
4588 | + } else { | |
4589 | + sb->failed_disks--; | |
4590 | + } | |
4591 | + } | |
4592 | + sb->nr_disks--; | |
4593 | + disk->major = 0; | |
4594 | + disk->minor = 0; | |
4595 | + mark_disk_removed(disk); | |
4596 | +} | |
4597 | ||
4598 | - if (!md_dev[minor].nb_dev) | |
4599 | - return -EINVAL; | |
4600 | - | |
4601 | - if (md_dev[minor].pers) | |
4602 | - return -EBUSY; | |
4603 | +#define BAD_MAGIC KERN_ERR \ | |
4604 | +"md: invalid raid superblock magic on %s\n" | |
4605 | ||
4606 | - md_dev[minor].repartition=repart; | |
4607 | - | |
4608 | - if ((pnum=PERSONALITY(&md_dev[minor]) >> (PERSONALITY_SHIFT)) | |
4609 | - >= MAX_PERSONALITY) | |
4610 | - return -EINVAL; | |
4611 | - | |
4612 | - /* Only RAID-1 and RAID-5 can have MD devices as underlying devices */ | |
4613 | - if (pnum != (RAID1 >> PERSONALITY_SHIFT) && pnum != (RAID5 >> PERSONALITY_SHIFT)){ | |
4614 | - for (i = 0; i < md_dev [minor].nb_dev; i++) | |
4615 | - if (MAJOR (md_dev [minor].devices [i].dev) == MD_MAJOR) | |
4616 | - return -EINVAL; | |
4617 | - } | |
4618 | - if (!pers[pnum]) | |
4619 | - { | |
4620 | -#ifdef CONFIG_KMOD | |
4621 | - char module_name[80]; | |
4622 | - sprintf (module_name, "md-personality-%d", pnum); | |
4623 | - request_module (module_name); | |
4624 | - if (!pers[pnum]) | |
4625 | -#endif | |
4626 | - return -EINVAL; | |
4627 | - } | |
4628 | - | |
4629 | - factor = min = 1 << FACTOR_SHIFT(FACTOR((md_dev+minor))); | |
4630 | - | |
4631 | - for (i=0; i<md_dev[minor].nb_dev; i++) | |
4632 | - if (md_dev[minor].devices[i].size<min) | |
4633 | - { | |
4634 | - printk ("Dev %s smaller than %dk, cannot shrink\n", | |
4635 | - partition_name (md_dev[minor].devices[i].dev), min); | |
4636 | - return -EINVAL; | |
4637 | - } | |
4638 | - | |
4639 | - for (i=0; i<md_dev[minor].nb_dev; i++) { | |
4640 | - fsync_dev(md_dev[minor].devices[i].dev); | |
4641 | - invalidate_buffers(md_dev[minor].devices[i].dev); | |
4642 | - } | |
4643 | - | |
4644 | - /* Resize devices according to the factor. It is used to align | |
4645 | - partitions size on a given chunk size. */ | |
4646 | - md_size[minor]=0; | |
4647 | - | |
4648 | - /* | |
4649 | - * Analyze the raid superblock | |
4650 | - */ | |
4651 | - if (analyze_sbs(minor, pnum)) | |
4652 | - return -EINVAL; | |
4653 | +#define BAD_MINOR KERN_ERR \ | |
4654 | +"md: %s: invalid raid minor (%x)\n" | |
4655 | ||
4656 | - md_dev[minor].pers=pers[pnum]; | |
4657 | - | |
4658 | - if ((err=md_dev[minor].pers->run (minor, md_dev+minor))) | |
4659 | - { | |
4660 | - md_dev[minor].pers=NULL; | |
4661 | - free_sb(md_dev + minor); | |
4662 | - return (err); | |
4663 | - } | |
4664 | - | |
4665 | - if (pnum != RAID0 >> PERSONALITY_SHIFT && pnum != LINEAR >> PERSONALITY_SHIFT) | |
4666 | - { | |
4667 | - md_dev[minor].sb->state &= ~(1 << MD_SB_CLEAN); | |
4668 | - md_update_sb(minor); | |
4669 | - } | |
4670 | - | |
4671 | - /* FIXME : We assume here we have blocks | |
4672 | - that are twice as large as sectors. | |
4673 | - THIS MAY NOT BE TRUE !!! */ | |
4674 | - md_hd_struct[minor].start_sect=0; | |
4675 | - md_hd_struct[minor].nr_sects=md_size[minor]<<1; | |
4676 | - | |
4677 | - read_ahead[MD_MAJOR] = 128; | |
4678 | - return (0); | |
4679 | -} | |
4680 | +#define OUT_OF_MEM KERN_ALERT \ | |
4681 | +"md: out of memory.\n" | |
4682 | + | |
4683 | +#define NO_SB KERN_ERR \ | |
4684 | +"md: disabled device %s, could not read superblock.\n" | |
4685 | ||
4686 | -static int do_md_stop (int minor, struct inode *inode) | |
4687 | +#define BAD_CSUM KERN_WARNING \ | |
4688 | +"md: invalid superblock checksum on %s\n" | |
4689 | + | |
4690 | +static int alloc_array_sb (mddev_t * mddev) | |
4691 | { | |
4692 | - int i; | |
4693 | - | |
4694 | - if (inode->i_count>1 || md_dev[minor].busy>1) { | |
4695 | - /* | |
4696 | - * ioctl : one open channel | |
4697 | - */ | |
4698 | - printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", | |
4699 | - minor, inode->i_count, md_dev[minor].busy); | |
4700 | - return -EBUSY; | |
4701 | - } | |
4702 | - | |
4703 | - if (md_dev[minor].pers) { | |
4704 | - /* | |
4705 | - * It is safe to call stop here, it only frees private | |
4706 | - * data. Also, it tells us if a device is unstoppable | |
4707 | - * (eg. resyncing is in progress) | |
4708 | - */ | |
4709 | - if (md_dev[minor].pers->stop (minor, md_dev+minor)) | |
4710 | - return -EBUSY; | |
4711 | - /* | |
4712 | - * The device won't exist anymore -> flush it now | |
4713 | - */ | |
4714 | - fsync_dev (inode->i_rdev); | |
4715 | - invalidate_buffers (inode->i_rdev); | |
4716 | - if (md_dev[minor].sb) { | |
4717 | - md_dev[minor].sb->state |= 1 << MD_SB_CLEAN; | |
4718 | - md_update_sb(minor); | |
4719 | - } | |
4720 | + if (mddev->sb) { | |
4721 | + MD_BUG(); | |
4722 | + return 0; | |
4723 | } | |
4724 | - | |
4725 | - /* Remove locks. */ | |
4726 | - if (md_dev[minor].sb) | |
4727 | - free_sb(md_dev + minor); | |
4728 | - for (i=0; i<md_dev[minor].nb_dev; i++) | |
4729 | - clear_inode (md_dev[minor].devices[i].inode); | |
4730 | - | |
4731 | - md_dev[minor].nb_dev=md_size[minor]=0; | |
4732 | - md_hd_struct[minor].nr_sects=0; | |
4733 | - md_dev[minor].pers=NULL; | |
4734 | - | |
4735 | - read_ahead[MD_MAJOR] = 128; | |
4736 | - | |
4737 | - return (0); | |
4738 | + | |
4739 | + mddev->sb = (mdp_super_t *) __get_free_page (GFP_KERNEL); | |
4740 | + if (!mddev->sb) | |
4741 | + return -ENOMEM; | |
4742 | + md_clear_page((unsigned long)mddev->sb); | |
4743 | + return 0; | |
4744 | } | |
4745 | ||
4746 | -static int do_md_add (int minor, kdev_t dev) | |
4747 | +static int alloc_disk_sb (mdk_rdev_t * rdev) | |
4748 | { | |
4749 | - int i; | |
4750 | - int hot_add=0; | |
4751 | - struct real_dev *realdev; | |
4752 | + if (rdev->sb) | |
4753 | + MD_BUG(); | |
4754 | ||
4755 | - if (md_dev[minor].nb_dev==MAX_REAL) | |
4756 | + rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL); | |
4757 | + if (!rdev->sb) { | |
4758 | + printk (OUT_OF_MEM); | |
4759 | return -EINVAL; | |
4760 | + } | |
4761 | + md_clear_page((unsigned long)rdev->sb); | |
4762 | ||
4763 | - if (!fs_may_mount (dev)) | |
4764 | - return -EBUSY; | |
4765 | + return 0; | |
4766 | +} | |
4767 | ||
4768 | - if (blk_size[MAJOR(dev)] == NULL || blk_size[MAJOR(dev)][MINOR(dev)] == 0) { | |
4769 | - printk("md_add(): zero device size, huh, bailing out.\n"); | |
4770 | - return -EINVAL; | |
4771 | +static void free_disk_sb (mdk_rdev_t * rdev) | |
4772 | +{ | |
4773 | + if (rdev->sb) { | |
4774 | + free_page((unsigned long) rdev->sb); | |
4775 | + rdev->sb = NULL; | |
4776 | + rdev->sb_offset = 0; | |
4777 | + rdev->size = 0; | |
4778 | + } else { | |
4779 | + if (!rdev->faulty) | |
4780 | + MD_BUG(); | |
4781 | } | |
4782 | +} | |
4783 | ||
4784 | - if (md_dev[minor].pers) { | |
4785 | - /* | |
4786 | - * The array is already running, hot-add the drive, or | |
4787 | - * bail out: | |
4788 | - */ | |
4789 | - if (!md_dev[minor].pers->hot_add_disk) | |
4790 | - return -EBUSY; | |
4791 | - else | |
4792 | - hot_add=1; | |
4793 | +static void mark_rdev_faulty (mdk_rdev_t * rdev) | |
4794 | +{ | |
4795 | + unsigned long flags; | |
4796 | + | |
4797 | + if (!rdev) { | |
4798 | + MD_BUG(); | |
4799 | + return; | |
4800 | } | |
4801 | + save_flags(flags); | |
4802 | + cli(); | |
4803 | + free_disk_sb(rdev); | |
4804 | + rdev->faulty = 1; | |
4805 | + restore_flags(flags); | |
4806 | +} | |
4807 | + | |
4808 | +static int read_disk_sb (mdk_rdev_t * rdev) | |
4809 | +{ | |
4810 | + int ret = -EINVAL; | |
4811 | + struct buffer_head *bh = NULL; | |
4812 | + kdev_t dev = rdev->dev; | |
4813 | + mdp_super_t *sb; | |
4814 | + u32 sb_offset; | |
4815 | ||
4816 | + if (!rdev->sb) { | |
4817 | + MD_BUG(); | |
4818 | + goto abort; | |
4819 | + } | |
4820 | + | |
4821 | /* | |
4822 | - * Careful. We cannot increase nb_dev for a running array. | |
4823 | + * Calculate the position of the superblock, | |
4824 | + * it's at the end of the disk | |
4825 | */ | |
4826 | - i=md_dev[minor].nb_dev; | |
4827 | - realdev = &md_dev[minor].devices[i]; | |
4828 | - realdev->dev=dev; | |
4829 | - | |
4830 | - /* Lock the device by inserting a dummy inode. This doesn't | |
4831 | - smell very good, but I need to be consistent with the | |
4832 | - mount stuff, specially with fs_may_mount. If someone have | |
4833 | - a better idea, please help ! */ | |
4834 | - | |
4835 | - realdev->inode=get_empty_inode (); | |
4836 | - realdev->inode->i_dev=dev; /* don't care about other fields */ | |
4837 | - insert_inode_hash (realdev->inode); | |
4838 | - | |
4839 | - /* Sizes are now rounded at run time */ | |
4840 | - | |
4841 | -/* md_dev[minor].devices[i].size=gen_real->sizes[MINOR(dev)]; HACKHACK*/ | |
4842 | - | |
4843 | - realdev->size=blk_size[MAJOR(dev)][MINOR(dev)]; | |
4844 | + sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1); | |
4845 | + rdev->sb_offset = sb_offset; | |
4846 | + printk("(read) %s's sb offset: %d", partition_name(dev), | |
4847 | + sb_offset); | |
4848 | + fsync_dev(dev); | |
4849 | + set_blocksize (dev, MD_SB_BYTES); | |
4850 | + bh = bread (dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
4851 | ||
4852 | - if (hot_add) { | |
4853 | + if (bh) { | |
4854 | + sb = (mdp_super_t *) bh->b_data; | |
4855 | + memcpy (rdev->sb, sb, MD_SB_BYTES); | |
4856 | + } else { | |
4857 | + printk (NO_SB,partition_name(rdev->dev)); | |
4858 | + goto abort; | |
4859 | + } | |
4860 | + printk(" [events: %08lx]\n", (unsigned long)get_unaligned(&rdev->sb->events)); | |
4861 | + ret = 0; | |
4862 | +abort: | |
4863 | + if (bh) | |
4864 | + brelse (bh); | |
4865 | + return ret; | |
4866 | +} | |
4867 | + | |
4868 | +static unsigned int calc_sb_csum (mdp_super_t * sb) | |
4869 | +{ | |
4870 | + unsigned int disk_csum, csum; | |
4871 | + | |
4872 | + disk_csum = sb->sb_csum; | |
4873 | + sb->sb_csum = 0; | |
4874 | + csum = csum_partial((void *)sb, MD_SB_BYTES, 0); | |
4875 | + sb->sb_csum = disk_csum; | |
4876 | + return csum; | |
4877 | +} | |
4878 | + | |
4879 | +/* | |
4880 | + * Check one RAID superblock for generic plausibility | |
4881 | + */ | |
4882 | + | |
4883 | +static int check_disk_sb (mdk_rdev_t * rdev) | |
4884 | +{ | |
4885 | + mdp_super_t *sb; | |
4886 | + int ret = -EINVAL; | |
4887 | + | |
4888 | + sb = rdev->sb; | |
4889 | + if (!sb) { | |
4890 | + MD_BUG(); | |
4891 | + goto abort; | |
4892 | + } | |
4893 | + | |
4894 | + if (sb->md_magic != MD_SB_MAGIC) { | |
4895 | + printk (BAD_MAGIC, partition_name(rdev->dev)); | |
4896 | + goto abort; | |
4897 | + } | |
4898 | + | |
4899 | + if (sb->md_minor >= MAX_MD_DEVS) { | |
4900 | + printk (BAD_MINOR, partition_name(rdev->dev), | |
4901 | + sb->md_minor); | |
4902 | + goto abort; | |
4903 | + } | |
4904 | + | |
4905 | + if (calc_sb_csum(sb) != sb->sb_csum) | |
4906 | + printk(BAD_CSUM, partition_name(rdev->dev)); | |
4907 | + ret = 0; | |
4908 | +abort: | |
4909 | + return ret; | |
4910 | +} | |
4911 | + | |
4912 | +static kdev_t dev_unit(kdev_t dev) | |
4913 | +{ | |
4914 | + unsigned int mask; | |
4915 | + struct gendisk *hd = find_gendisk(dev); | |
4916 | + | |
4917 | + if (!hd) | |
4918 | + return 0; | |
4919 | + mask = ~((1 << hd->minor_shift) - 1); | |
4920 | + | |
4921 | + return MKDEV(MAJOR(dev), MINOR(dev) & mask); | |
4922 | +} | |
4923 | + | |
4924 | +static mdk_rdev_t * match_dev_unit(mddev_t *mddev, kdev_t dev) | |
4925 | +{ | |
4926 | + struct md_list_head *tmp; | |
4927 | + mdk_rdev_t *rdev; | |
4928 | + | |
4929 | + ITERATE_RDEV(mddev,rdev,tmp) | |
4930 | + if (dev_unit(rdev->dev) == dev_unit(dev)) | |
4931 | + return rdev; | |
4932 | + | |
4933 | + return NULL; | |
4934 | +} | |
4935 | + | |
4936 | +static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |
4937 | +{ | |
4938 | + struct md_list_head *tmp; | |
4939 | + mdk_rdev_t *rdev; | |
4940 | + | |
4941 | + ITERATE_RDEV(mddev1,rdev,tmp) | |
4942 | + if (match_dev_unit(mddev2, rdev->dev)) | |
4943 | + return 1; | |
4944 | + | |
4945 | + return 0; | |
4946 | +} | |
4947 | + | |
4948 | +static MD_LIST_HEAD(all_raid_disks); | |
4949 | +static MD_LIST_HEAD(pending_raid_disks); | |
4950 | + | |
4951 | +static void bind_rdev_to_array (mdk_rdev_t * rdev, mddev_t * mddev) | |
4952 | +{ | |
4953 | + mdk_rdev_t *same_pdev; | |
4954 | + | |
4955 | + if (rdev->mddev) { | |
4956 | + MD_BUG(); | |
4957 | + return; | |
4958 | + } | |
4959 | + same_pdev = match_dev_unit(mddev, rdev->dev); | |
4960 | + if (same_pdev) | |
4961 | + printk( KERN_WARNING | |
4962 | +"md%d: WARNING: %s appears to be on the same physical disk as %s. True\n" | |
4963 | +" protection against single-disk failure might be compromised.\n", | |
4964 | + mdidx(mddev), partition_name(rdev->dev), | |
4965 | + partition_name(same_pdev->dev)); | |
4966 | + | |
4967 | + md_list_add(&rdev->same_set, &mddev->disks); | |
4968 | + rdev->mddev = mddev; | |
4969 | + mddev->nb_dev++; | |
4970 | + printk("bind<%s,%d>\n", partition_name(rdev->dev), mddev->nb_dev); | |
4971 | +} | |
4972 | + | |
4973 | +static void unbind_rdev_from_array (mdk_rdev_t * rdev) | |
4974 | +{ | |
4975 | + if (!rdev->mddev) { | |
4976 | + MD_BUG(); | |
4977 | + return; | |
4978 | + } | |
4979 | + md_list_del(&rdev->same_set); | |
4980 | + MD_INIT_LIST_HEAD(&rdev->same_set); | |
4981 | + rdev->mddev->nb_dev--; | |
4982 | + printk("unbind<%s,%d>\n", partition_name(rdev->dev), | |
4983 | + rdev->mddev->nb_dev); | |
4984 | + rdev->mddev = NULL; | |
4985 | +} | |
4986 | + | |
4987 | +/* | |
4988 | + * prevent the device from being mounted, repartitioned or | |
4989 | + * otherwise reused by a RAID array (or any other kernel | |
4990 | + * subsystem), by opening the device. [simply getting an | |
4991 | + * inode is not enough, the SCSI module usage code needs | |
4992 | + * an explicit open() on the device] | |
4993 | + */ | |
4994 | +static int lock_rdev (mdk_rdev_t *rdev) | |
4995 | +{ | |
4996 | + int err = 0; | |
4997 | + | |
4998 | + /* | |
4999 | + * First insert a dummy inode. | |
5000 | + */ | |
5001 | + if (rdev->inode) | |
5002 | + MD_BUG(); | |
5003 | + rdev->inode = get_empty_inode(); | |
5004 | + /* | |
5005 | + * we dont care about any other fields | |
5006 | + */ | |
5007 | + rdev->inode->i_dev = rdev->inode->i_rdev = rdev->dev; | |
5008 | + insert_inode_hash(rdev->inode); | |
5009 | + | |
5010 | + memset(&rdev->filp, 0, sizeof(rdev->filp)); | |
5011 | + rdev->filp.f_mode = 3; /* read write */ | |
5012 | + err = blkdev_open(rdev->inode, &rdev->filp); | |
5013 | + if (err) { | |
5014 | + printk("blkdev_open() failed: %d\n", err); | |
5015 | + clear_inode(rdev->inode); | |
5016 | + rdev->inode = NULL; | |
5017 | + } | |
5018 | + return err; | |
5019 | +} | |
5020 | + | |
5021 | +static void unlock_rdev (mdk_rdev_t *rdev) | |
5022 | +{ | |
5023 | + blkdev_release(rdev->inode); | |
5024 | + if (!rdev->inode) | |
5025 | + MD_BUG(); | |
5026 | + clear_inode(rdev->inode); | |
5027 | + rdev->inode = NULL; | |
5028 | +} | |
5029 | + | |
5030 | +static void export_rdev (mdk_rdev_t * rdev) | |
5031 | +{ | |
5032 | + printk("export_rdev(%s)\n",partition_name(rdev->dev)); | |
5033 | + if (rdev->mddev) | |
5034 | + MD_BUG(); | |
5035 | + unlock_rdev(rdev); | |
5036 | + free_disk_sb(rdev); | |
5037 | + md_list_del(&rdev->all); | |
5038 | + MD_INIT_LIST_HEAD(&rdev->all); | |
5039 | + if (rdev->pending.next != &rdev->pending) { | |
5040 | + printk("(%s was pending)\n",partition_name(rdev->dev)); | |
5041 | + md_list_del(&rdev->pending); | |
5042 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
5043 | + } | |
5044 | + rdev->dev = 0; | |
5045 | + rdev->faulty = 0; | |
5046 | + kfree(rdev); | |
5047 | +} | |
5048 | + | |
5049 | +static void kick_rdev_from_array (mdk_rdev_t * rdev) | |
5050 | +{ | |
5051 | + unbind_rdev_from_array(rdev); | |
5052 | + export_rdev(rdev); | |
5053 | +} | |
5054 | + | |
5055 | +static void export_array (mddev_t *mddev) | |
5056 | +{ | |
5057 | + struct md_list_head *tmp; | |
5058 | + mdk_rdev_t *rdev; | |
5059 | + mdp_super_t *sb = mddev->sb; | |
5060 | + | |
5061 | + if (mddev->sb) { | |
5062 | + mddev->sb = NULL; | |
5063 | + free_page((unsigned long) sb); | |
5064 | + } | |
5065 | + | |
5066 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5067 | + if (!rdev->mddev) { | |
5068 | + MD_BUG(); | |
5069 | + continue; | |
5070 | + } | |
5071 | + kick_rdev_from_array(rdev); | |
5072 | + } | |
5073 | + if (mddev->nb_dev) | |
5074 | + MD_BUG(); | |
5075 | +} | |
5076 | + | |
5077 | +#undef BAD_CSUM | |
5078 | +#undef BAD_MAGIC | |
5079 | +#undef OUT_OF_MEM | |
5080 | +#undef NO_SB | |
5081 | + | |
5082 | +static void print_desc(mdp_disk_t *desc) | |
5083 | +{ | |
5084 | + printk(" DISK<N:%d,%s(%d,%d),R:%d,S:%d>\n", desc->number, | |
5085 | + partition_name(MKDEV(desc->major,desc->minor)), | |
5086 | + desc->major,desc->minor,desc->raid_disk,desc->state); | |
5087 | +} | |
5088 | + | |
5089 | +static void print_sb(mdp_super_t *sb) | |
5090 | +{ | |
5091 | + int i; | |
5092 | + | |
5093 | + printk(" SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n", | |
5094 | + sb->major_version, sb->minor_version, sb->patch_version, | |
5095 | + sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3, | |
5096 | + sb->ctime); | |
5097 | + printk(" L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", sb->level, | |
5098 | + sb->size, sb->nr_disks, sb->raid_disks, sb->md_minor, | |
5099 | + sb->layout, sb->chunk_size); | |
5100 | + printk(" UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n", | |
5101 | + sb->utime, sb->state, sb->active_disks, sb->working_disks, | |
5102 | + sb->failed_disks, sb->spare_disks, | |
5103 | + sb->sb_csum, (unsigned long)get_unaligned(&sb->events)); | |
5104 | + | |
5105 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
5106 | + mdp_disk_t *desc; | |
5107 | + | |
5108 | + desc = sb->disks + i; | |
5109 | + printk(" D %2d: ", i); | |
5110 | + print_desc(desc); | |
5111 | + } | |
5112 | + printk(" THIS: "); | |
5113 | + print_desc(&sb->this_disk); | |
5114 | + | |
5115 | +} | |
5116 | + | |
5117 | +static void print_rdev(mdk_rdev_t *rdev) | |
5118 | +{ | |
5119 | + printk(" rdev %s: O:%s, SZ:%08d F:%d DN:%d ", | |
5120 | + partition_name(rdev->dev), partition_name(rdev->old_dev), | |
5121 | + rdev->size, rdev->faulty, rdev->desc_nr); | |
5122 | + if (rdev->sb) { | |
5123 | + printk("rdev superblock:\n"); | |
5124 | + print_sb(rdev->sb); | |
5125 | + } else | |
5126 | + printk("no rdev superblock!\n"); | |
5127 | +} | |
5128 | + | |
5129 | +void md_print_devices (void) | |
5130 | +{ | |
5131 | + struct md_list_head *tmp, *tmp2; | |
5132 | + mdk_rdev_t *rdev; | |
5133 | + mddev_t *mddev; | |
5134 | + | |
5135 | + printk("\n"); | |
5136 | + printk(" **********************************\n"); | |
5137 | + printk(" * <COMPLETE RAID STATE PRINTOUT> *\n"); | |
5138 | + printk(" **********************************\n"); | |
5139 | + ITERATE_MDDEV(mddev,tmp) { | |
5140 | + printk("md%d: ", mdidx(mddev)); | |
5141 | + | |
5142 | + ITERATE_RDEV(mddev,rdev,tmp2) | |
5143 | + printk("<%s>", partition_name(rdev->dev)); | |
5144 | + | |
5145 | + if (mddev->sb) { | |
5146 | + printk(" array superblock:\n"); | |
5147 | + print_sb(mddev->sb); | |
5148 | + } else | |
5149 | + printk(" no array superblock.\n"); | |
5150 | + | |
5151 | + ITERATE_RDEV(mddev,rdev,tmp2) | |
5152 | + print_rdev(rdev); | |
5153 | + } | |
5154 | + printk(" **********************************\n"); | |
5155 | + printk("\n"); | |
5156 | +} | |
5157 | + | |
5158 | +static int sb_equal ( mdp_super_t *sb1, mdp_super_t *sb2) | |
5159 | +{ | |
5160 | + int ret; | |
5161 | + mdp_super_t *tmp1, *tmp2; | |
5162 | + | |
5163 | + tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL); | |
5164 | + tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL); | |
5165 | + | |
5166 | + if (!tmp1 || !tmp2) { | |
5167 | + ret = 0; | |
5168 | + goto abort; | |
5169 | + } | |
5170 | + | |
5171 | + *tmp1 = *sb1; | |
5172 | + *tmp2 = *sb2; | |
5173 | + | |
5174 | + /* | |
5175 | + * nr_disks is not constant | |
5176 | + */ | |
5177 | + tmp1->nr_disks = 0; | |
5178 | + tmp2->nr_disks = 0; | |
5179 | + | |
5180 | + if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) | |
5181 | + ret = 0; | |
5182 | + else | |
5183 | + ret = 1; | |
5184 | + | |
5185 | +abort: | |
5186 | + if (tmp1) | |
5187 | + kfree(tmp1); | |
5188 | + if (tmp2) | |
5189 | + kfree(tmp2); | |
5190 | + | |
5191 | + return ret; | |
5192 | +} | |
5193 | + | |
5194 | +static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2) | |
5195 | +{ | |
5196 | + if ( (rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) && | |
5197 | + (rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) && | |
5198 | + (rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) && | |
5199 | + (rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3)) | |
5200 | + | |
5201 | + return 1; | |
5202 | + | |
5203 | + return 0; | |
5204 | +} | |
5205 | + | |
5206 | +static mdk_rdev_t * find_rdev_all (kdev_t dev) | |
5207 | +{ | |
5208 | + struct md_list_head *tmp; | |
5209 | + mdk_rdev_t *rdev; | |
5210 | + | |
5211 | + tmp = all_raid_disks.next; | |
5212 | + while (tmp != &all_raid_disks) { | |
5213 | + rdev = md_list_entry(tmp, mdk_rdev_t, all); | |
5214 | + if (rdev->dev == dev) | |
5215 | + return rdev; | |
5216 | + tmp = tmp->next; | |
5217 | + } | |
5218 | + return NULL; | |
5219 | +} | |
5220 | + | |
5221 | +#define GETBLK_FAILED KERN_ERR \ | |
5222 | +"md: getblk failed for device %s\n" | |
5223 | + | |
5224 | +static int write_disk_sb(mdk_rdev_t * rdev) | |
5225 | +{ | |
5226 | + struct buffer_head *bh; | |
5227 | + kdev_t dev; | |
5228 | + u32 sb_offset, size; | |
5229 | + mdp_super_t *sb; | |
5230 | + | |
5231 | + if (!rdev->sb) { | |
5232 | + MD_BUG(); | |
5233 | + return -1; | |
5234 | + } | |
5235 | + if (rdev->faulty) { | |
5236 | + MD_BUG(); | |
5237 | + return -1; | |
5238 | + } | |
5239 | + if (rdev->sb->md_magic != MD_SB_MAGIC) { | |
5240 | + MD_BUG(); | |
5241 | + return -1; | |
5242 | + } | |
5243 | + | |
5244 | + dev = rdev->dev; | |
5245 | + sb_offset = calc_dev_sboffset(dev, rdev->mddev, 1); | |
5246 | + if (rdev->sb_offset != sb_offset) { | |
5247 | + printk("%s's sb offset has changed from %d to %d, skipping\n", partition_name(dev), rdev->sb_offset, sb_offset); | |
5248 | + goto skip; | |
5249 | + } | |
5250 | + /* | |
5251 | + * If the disk went offline meanwhile and it's just a spare, then | |
5252 | + * it's size has changed to zero silently, and the MD code does | |
5253 | + * not yet know that it's faulty. | |
5254 | + */ | |
5255 | + size = calc_dev_size(dev, rdev->mddev, 1); | |
5256 | + if (size != rdev->size) { | |
5257 | + printk("%s's size has changed from %d to %d since import, skipping\n", partition_name(dev), rdev->size, size); | |
5258 | + goto skip; | |
5259 | + } | |
5260 | + | |
5261 | + printk("(write) %s's sb offset: %d\n", partition_name(dev), sb_offset); | |
5262 | + fsync_dev(dev); | |
5263 | + set_blocksize(dev, MD_SB_BYTES); | |
5264 | + bh = getblk(dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES); | |
5265 | + if (!bh) { | |
5266 | + printk(GETBLK_FAILED, partition_name(dev)); | |
5267 | + return 1; | |
5268 | + } | |
5269 | + memset(bh->b_data,0,bh->b_size); | |
5270 | + sb = (mdp_super_t *) bh->b_data; | |
5271 | + memcpy(sb, rdev->sb, MD_SB_BYTES); | |
5272 | + | |
5273 | + mark_buffer_uptodate(bh, 1); | |
5274 | + mark_buffer_dirty(bh, 1); | |
5275 | + ll_rw_block(WRITE, 1, &bh); | |
5276 | + wait_on_buffer(bh); | |
5277 | + brelse(bh); | |
5278 | + fsync_dev(dev); | |
5279 | +skip: | |
5280 | + return 0; | |
5281 | +} | |
5282 | +#undef GETBLK_FAILED KERN_ERR | |
5283 | + | |
5284 | +static void set_this_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |
5285 | +{ | |
5286 | + int i, ok = 0; | |
5287 | + mdp_disk_t *desc; | |
5288 | + | |
5289 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
5290 | + desc = mddev->sb->disks + i; | |
5291 | +#if 0 | |
5292 | + if (disk_faulty(desc)) { | |
5293 | + if (MKDEV(desc->major,desc->minor) == rdev->dev) | |
5294 | + ok = 1; | |
5295 | + continue; | |
5296 | + } | |
5297 | +#endif | |
5298 | + if (MKDEV(desc->major,desc->minor) == rdev->dev) { | |
5299 | + rdev->sb->this_disk = *desc; | |
5300 | + rdev->desc_nr = desc->number; | |
5301 | + ok = 1; | |
5302 | + break; | |
5303 | + } | |
5304 | + } | |
5305 | + | |
5306 | + if (!ok) { | |
5307 | + MD_BUG(); | |
5308 | + } | |
5309 | +} | |
5310 | + | |
5311 | +static int sync_sbs(mddev_t * mddev) | |
5312 | +{ | |
5313 | + mdk_rdev_t *rdev; | |
5314 | + mdp_super_t *sb; | |
5315 | + struct md_list_head *tmp; | |
5316 | + | |
5317 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5318 | + if (rdev->faulty) | |
5319 | + continue; | |
5320 | + sb = rdev->sb; | |
5321 | + *sb = *mddev->sb; | |
5322 | + set_this_disk(mddev, rdev); | |
5323 | + sb->sb_csum = calc_sb_csum(sb); | |
5324 | + } | |
5325 | + return 0; | |
5326 | +} | |
5327 | + | |
5328 | +int md_update_sb(mddev_t * mddev) | |
5329 | +{ | |
5330 | + int first, err, count = 100; | |
5331 | + struct md_list_head *tmp; | |
5332 | + mdk_rdev_t *rdev; | |
5333 | + __u64 ev; | |
5334 | + | |
5335 | +repeat: | |
5336 | + mddev->sb->utime = CURRENT_TIME; | |
5337 | + ev = get_unaligned(&mddev->sb->events); | |
5338 | + ++ev; | |
5339 | + put_unaligned(ev,&mddev->sb->events); | |
5340 | + if (ev == (__u64)0) { | |
5341 | + /* | |
5342 | + * oops, this 64-bit counter should never wrap. | |
5343 | + * Either we are in around ~1 trillion A.C., assuming | |
5344 | + * 1 reboot per second, or we have a bug: | |
5345 | + */ | |
5346 | + MD_BUG(); | |
5347 | + --ev; | |
5348 | + put_unaligned(ev,&mddev->sb->events); | |
5349 | + } | |
5350 | + sync_sbs(mddev); | |
5351 | + | |
5352 | + /* | |
5353 | + * do not write anything to disk if using | |
5354 | + * nonpersistent superblocks | |
5355 | + */ | |
5356 | + if (mddev->sb->not_persistent) | |
5357 | + return 0; | |
5358 | + | |
5359 | + printk(KERN_INFO "md: updating md%d RAID superblock on device\n", | |
5360 | + mdidx(mddev)); | |
5361 | + | |
5362 | + first = 1; | |
5363 | + err = 0; | |
5364 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5365 | + if (!first) { | |
5366 | + first = 0; | |
5367 | + printk(", "); | |
5368 | + } | |
5369 | + if (rdev->faulty) | |
5370 | + printk("(skipping faulty "); | |
5371 | + printk("%s ", partition_name(rdev->dev)); | |
5372 | + if (!rdev->faulty) { | |
5373 | + printk("[events: %08lx]", | |
5374 | + (unsigned long)get_unaligned(&rdev->sb->events)); | |
5375 | + err += write_disk_sb(rdev); | |
5376 | + } else | |
5377 | + printk(")\n"); | |
5378 | + } | |
5379 | + printk(".\n"); | |
5380 | + if (err) { | |
5381 | + printk("errors occured during superblock update, repeating\n"); | |
5382 | + if (--count) | |
5383 | + goto repeat; | |
5384 | + printk("excessive errors occured during superblock update, exiting\n"); | |
5385 | + } | |
5386 | + return 0; | |
5387 | +} | |
5388 | + | |
5389 | +/* | |
5390 | + * Import a device. If 'on_disk', then sanity check the superblock | |
5391 | + * | |
5392 | + * mark the device faulty if: | |
5393 | + * | |
5394 | + * - the device is nonexistent (zero size) | |
5395 | + * - the device has no valid superblock | |
5396 | + * | |
5397 | + * a faulty rdev _never_ has rdev->sb set. | |
5398 | + */ | |
5399 | +static int md_import_device (kdev_t newdev, int on_disk) | |
5400 | +{ | |
5401 | + int err; | |
5402 | + mdk_rdev_t *rdev; | |
5403 | + unsigned int size; | |
5404 | + | |
5405 | + if (find_rdev_all(newdev)) | |
5406 | + return -EEXIST; | |
5407 | + | |
5408 | + rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL); | |
5409 | + if (!rdev) { | |
5410 | + printk("could not alloc mem for %s!\n", partition_name(newdev)); | |
5411 | + return -ENOMEM; | |
5412 | + } | |
5413 | + memset(rdev, 0, sizeof(*rdev)); | |
5414 | + | |
5415 | + if (!fs_may_mount(newdev)) { | |
5416 | + printk("md: can not import %s, has active inodes!\n", | |
5417 | + partition_name(newdev)); | |
5418 | + err = -EBUSY; | |
5419 | + goto abort_free; | |
5420 | + } | |
5421 | + | |
5422 | + if ((err = alloc_disk_sb(rdev))) | |
5423 | + goto abort_free; | |
5424 | + | |
5425 | + rdev->dev = newdev; | |
5426 | + if (lock_rdev(rdev)) { | |
5427 | + printk("md: could not lock %s, zero-size? Marking faulty.\n", | |
5428 | + partition_name(newdev)); | |
5429 | + err = -EINVAL; | |
5430 | + goto abort_free; | |
5431 | + } | |
5432 | + rdev->desc_nr = -1; | |
5433 | + rdev->faulty = 0; | |
5434 | + | |
5435 | + size = 0; | |
5436 | + if (blk_size[MAJOR(newdev)]) | |
5437 | + size = blk_size[MAJOR(newdev)][MINOR(newdev)]; | |
5438 | + if (!size) { | |
5439 | + printk("md: %s has zero size, marking faulty!\n", | |
5440 | + partition_name(newdev)); | |
5441 | + err = -EINVAL; | |
5442 | + goto abort_free; | |
5443 | + } | |
5444 | + | |
5445 | + if (on_disk) { | |
5446 | + if ((err = read_disk_sb(rdev))) { | |
5447 | + printk("md: could not read %s's sb, not importing!\n", | |
5448 | + partition_name(newdev)); | |
5449 | + goto abort_free; | |
5450 | + } | |
5451 | + if ((err = check_disk_sb(rdev))) { | |
5452 | + printk("md: %s has invalid sb, not importing!\n", | |
5453 | + partition_name(newdev)); | |
5454 | + goto abort_free; | |
5455 | + } | |
5456 | + | |
5457 | + rdev->old_dev = MKDEV(rdev->sb->this_disk.major, | |
5458 | + rdev->sb->this_disk.minor); | |
5459 | + rdev->desc_nr = rdev->sb->this_disk.number; | |
5460 | + } | |
5461 | + md_list_add(&rdev->all, &all_raid_disks); | |
5462 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
5463 | + | |
5464 | + if (rdev->faulty && rdev->sb) | |
5465 | + free_disk_sb(rdev); | |
5466 | + return 0; | |
5467 | + | |
5468 | +abort_free: | |
5469 | + if (rdev->sb) { | |
5470 | + if (rdev->inode) | |
5471 | + unlock_rdev(rdev); | |
5472 | + free_disk_sb(rdev); | |
5473 | + } | |
5474 | + kfree(rdev); | |
5475 | + return err; | |
5476 | +} | |
5477 | + | |
5478 | +/* | |
5479 | + * Check a full RAID array for plausibility | |
5480 | + */ | |
5481 | + | |
5482 | +#define INCONSISTENT KERN_ERR \ | |
5483 | +"md: fatal superblock inconsistency in %s -- removing from array\n" | |
5484 | + | |
5485 | +#define OUT_OF_DATE KERN_ERR \ | |
5486 | +"md: superblock update time inconsistency -- using the most recent one\n" | |
5487 | + | |
5488 | +#define OLD_VERSION KERN_ALERT \ | |
5489 | +"md: md%d: unsupported raid array version %d.%d.%d\n" | |
5490 | + | |
5491 | +#define NOT_CLEAN_IGNORE KERN_ERR \ | |
5492 | +"md: md%d: raid array is not clean -- starting background reconstruction\n" | |
5493 | + | |
5494 | +#define UNKNOWN_LEVEL KERN_ERR \ | |
5495 | +"md: md%d: unsupported raid level %d\n" | |
5496 | + | |
5497 | +static int analyze_sbs (mddev_t * mddev) | |
5498 | +{ | |
5499 | + int out_of_date = 0, i; | |
5500 | + struct md_list_head *tmp, *tmp2; | |
5501 | + mdk_rdev_t *rdev, *rdev2, *freshest; | |
5502 | + mdp_super_t *sb; | |
5503 | + | |
5504 | + /* | |
5505 | + * Verify the RAID superblock on each real device | |
5506 | + */ | |
5507 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5508 | + if (rdev->faulty) { | |
5509 | + MD_BUG(); | |
5510 | + goto abort; | |
5511 | + } | |
5512 | + if (!rdev->sb) { | |
5513 | + MD_BUG(); | |
5514 | + goto abort; | |
5515 | + } | |
5516 | + if (check_disk_sb(rdev)) | |
5517 | + goto abort; | |
5518 | + } | |
5519 | + | |
5520 | + /* | |
5521 | + * The superblock constant part has to be the same | |
5522 | + * for all disks in the array. | |
5523 | + */ | |
5524 | + sb = NULL; | |
5525 | + | |
5526 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5527 | + if (!sb) { | |
5528 | + sb = rdev->sb; | |
5529 | + continue; | |
5530 | + } | |
5531 | + if (!sb_equal(sb, rdev->sb)) { | |
5532 | + printk (INCONSISTENT, partition_name(rdev->dev)); | |
5533 | + kick_rdev_from_array(rdev); | |
5534 | + continue; | |
5535 | + } | |
5536 | + } | |
5537 | + | |
5538 | + /* | |
5539 | + * OK, we have all disks and the array is ready to run. Let's | |
5540 | + * find the freshest superblock, that one will be the superblock | |
5541 | + * that represents the whole array. | |
5542 | + */ | |
5543 | + if (!mddev->sb) | |
5544 | + if (alloc_array_sb(mddev)) | |
5545 | + goto abort; | |
5546 | + sb = mddev->sb; | |
5547 | + freshest = NULL; | |
5548 | + | |
5549 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5550 | + __u64 ev1, ev2; | |
5551 | + /* | |
5552 | + * if the checksum is invalid, use the superblock | |
5553 | + * only as a last resort. (decrease it's age by | |
5554 | + * one event) | |
5555 | + */ | |
5556 | + if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) { | |
5557 | + __u64 ev = get_unaligned(&rdev->sb->events); | |
5558 | + if (ev != (__u64)0) { | |
5559 | + --ev; | |
5560 | + put_unaligned(ev,&rdev->sb->events); | |
5561 | + } | |
5562 | + } | |
5563 | + | |
5564 | + printk("%s's event counter: %08lx\n", partition_name(rdev->dev), | |
5565 | + (unsigned long)get_unaligned(&rdev->sb->events)); | |
5566 | + if (!freshest) { | |
5567 | + freshest = rdev; | |
5568 | + continue; | |
5569 | + } | |
5570 | + /* | |
5571 | + * Find the newest superblock version | |
5572 | + */ | |
5573 | + ev1 = get_unaligned(&rdev->sb->events); | |
5574 | + ev2 = get_unaligned(&freshest->sb->events); | |
5575 | + if (ev1 != ev2) { | |
5576 | + out_of_date = 1; | |
5577 | + if (ev1 > ev2) | |
5578 | + freshest = rdev; | |
5579 | + } | |
5580 | + } | |
5581 | + if (out_of_date) { | |
5582 | + printk(OUT_OF_DATE); | |
5583 | + printk("freshest: %s\n", partition_name(freshest->dev)); | |
5584 | + } | |
5585 | + memcpy (sb, freshest->sb, sizeof(*sb)); | |
5586 | + | |
5587 | + /* | |
5588 | + * at this point we have picked the 'best' superblock | |
5589 | + * from all available superblocks. | |
5590 | + * now we validate this superblock and kick out possibly | |
5591 | + * failed disks. | |
5592 | + */ | |
5593 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5594 | + /* | |
5595 | + * Kick all non-fresh devices faulty | |
5596 | + */ | |
5597 | + __u64 ev1, ev2; | |
5598 | + ev1 = get_unaligned(&rdev->sb->events); | |
5599 | + ev2 = get_unaligned(&sb->events); | |
5600 | + ++ev1; | |
5601 | + if (ev1 < ev2) { | |
5602 | + printk("md: kicking non-fresh %s from array!\n", | |
5603 | + partition_name(rdev->dev)); | |
5604 | + kick_rdev_from_array(rdev); | |
5605 | + continue; | |
5606 | + } | |
5607 | + } | |
5608 | + | |
5609 | + /* | |
5610 | + * Fix up changed device names ... but only if this disk has a | |
5611 | + * recent update time. Use faulty checksum ones too. | |
5612 | + */ | |
5613 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5614 | + __u64 ev1, ev2, ev3; | |
5615 | + if (rdev->faulty) { /* REMOVEME */ | |
5616 | + MD_BUG(); | |
5617 | + goto abort; | |
5618 | + } | |
5619 | + ev1 = get_unaligned(&rdev->sb->events); | |
5620 | + ev2 = get_unaligned(&sb->events); | |
5621 | + ev3 = ev2; | |
5622 | + --ev3; | |
5623 | + if ((rdev->dev != rdev->old_dev) && | |
5624 | + ((ev1 == ev2) || (ev1 == ev3))) { | |
5625 | + mdp_disk_t *desc; | |
5626 | + | |
5627 | + printk("md: device name has changed from %s to %s since last import!\n", partition_name(rdev->old_dev), partition_name(rdev->dev)); | |
5628 | + if (rdev->desc_nr == -1) { | |
5629 | + MD_BUG(); | |
5630 | + goto abort; | |
5631 | + } | |
5632 | + desc = &sb->disks[rdev->desc_nr]; | |
5633 | + if (rdev->old_dev != MKDEV(desc->major, desc->minor)) { | |
5634 | + MD_BUG(); | |
5635 | + goto abort; | |
5636 | + } | |
5637 | + desc->major = MAJOR(rdev->dev); | |
5638 | + desc->minor = MINOR(rdev->dev); | |
5639 | + desc = &rdev->sb->this_disk; | |
5640 | + desc->major = MAJOR(rdev->dev); | |
5641 | + desc->minor = MINOR(rdev->dev); | |
5642 | + } | |
5643 | + } | |
5644 | + | |
5645 | + /* | |
5646 | + * Remove unavailable and faulty devices ... | |
5647 | + * | |
5648 | + * note that if an array becomes completely unrunnable due to | |
5649 | + * missing devices, we do not write the superblock back, so the | |
5650 | + * administrator has a chance to fix things up. The removal thus | |
5651 | + * only happens if it's nonfatal to the contents of the array. | |
5652 | + */ | |
5653 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
5654 | + int found; | |
5655 | + mdp_disk_t *desc; | |
5656 | + kdev_t dev; | |
5657 | + | |
5658 | + desc = sb->disks + i; | |
5659 | + dev = MKDEV(desc->major, desc->minor); | |
5660 | + | |
5661 | + /* | |
5662 | + * We kick faulty devices/descriptors immediately. | |
5663 | + */ | |
5664 | + if (disk_faulty(desc)) { | |
5665 | + found = 0; | |
5666 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5667 | + if (rdev->desc_nr != desc->number) | |
5668 | + continue; | |
5669 | + printk("md%d: kicking faulty %s!\n", | |
5670 | + mdidx(mddev),partition_name(rdev->dev)); | |
5671 | + kick_rdev_from_array(rdev); | |
5672 | + found = 1; | |
5673 | + break; | |
5674 | + } | |
5675 | + if (!found) { | |
5676 | + if (dev == MKDEV(0,0)) | |
5677 | + continue; | |
5678 | + printk("md%d: removing former faulty %s!\n", | |
5679 | + mdidx(mddev), partition_name(dev)); | |
5680 | + } | |
5681 | + remove_descriptor(desc, sb); | |
5682 | + continue; | |
5683 | + } | |
5684 | + | |
5685 | + if (dev == MKDEV(0,0)) | |
5686 | + continue; | |
5687 | + /* | |
5688 | + * Is this device present in the rdev ring? | |
5689 | + */ | |
5690 | + found = 0; | |
5691 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5692 | + if (rdev->desc_nr == desc->number) { | |
5693 | + found = 1; | |
5694 | + break; | |
5695 | + } | |
5696 | + } | |
5697 | + if (found) | |
5698 | + continue; | |
5699 | + | |
5700 | + printk("md%d: former device %s is unavailable, removing from array!\n", mdidx(mddev), partition_name(dev)); | |
5701 | + remove_descriptor(desc, sb); | |
5702 | + } | |
5703 | + | |
5704 | + /* | |
5705 | + * Double check wether all devices mentioned in the | |
5706 | + * superblock are in the rdev ring. | |
5707 | + */ | |
5708 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
5709 | + mdp_disk_t *desc; | |
5710 | + kdev_t dev; | |
5711 | + | |
5712 | + desc = sb->disks + i; | |
5713 | + dev = MKDEV(desc->major, desc->minor); | |
5714 | + | |
5715 | + if (dev == MKDEV(0,0)) | |
5716 | + continue; | |
5717 | + | |
5718 | + if (disk_faulty(desc)) { | |
5719 | + MD_BUG(); | |
5720 | + goto abort; | |
5721 | + } | |
5722 | + | |
5723 | + rdev = find_rdev(mddev, dev); | |
5724 | + if (!rdev) { | |
5725 | + MD_BUG(); | |
5726 | + goto abort; | |
5727 | + } | |
5728 | + } | |
5729 | + | |
5730 | + /* | |
5731 | + * Do a final reality check. | |
5732 | + */ | |
5733 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5734 | + if (rdev->desc_nr == -1) { | |
5735 | + MD_BUG(); | |
5736 | + goto abort; | |
5737 | + } | |
5738 | + /* | |
5739 | + * is the desc_nr unique? | |
5740 | + */ | |
5741 | + ITERATE_RDEV(mddev,rdev2,tmp2) { | |
5742 | + if ((rdev2 != rdev) && | |
5743 | + (rdev2->desc_nr == rdev->desc_nr)) { | |
5744 | + MD_BUG(); | |
5745 | + goto abort; | |
5746 | + } | |
5747 | + } | |
5748 | + /* | |
5749 | + * is the device unique? | |
5750 | + */ | |
5751 | + ITERATE_RDEV(mddev,rdev2,tmp2) { | |
5752 | + if ((rdev2 != rdev) && | |
5753 | + (rdev2->dev == rdev->dev)) { | |
5754 | + MD_BUG(); | |
5755 | + goto abort; | |
5756 | + } | |
5757 | + } | |
5758 | + } | |
5759 | + | |
5760 | + /* | |
5761 | + * Check if we can support this RAID array | |
5762 | + */ | |
5763 | + if (sb->major_version != MD_MAJOR_VERSION || | |
5764 | + sb->minor_version > MD_MINOR_VERSION) { | |
5765 | + | |
5766 | + printk (OLD_VERSION, mdidx(mddev), sb->major_version, | |
5767 | + sb->minor_version, sb->patch_version); | |
5768 | + goto abort; | |
5769 | + } | |
5770 | + | |
5771 | + if ((sb->state != (1 << MD_SB_CLEAN)) && ((sb->level == 1) || | |
5772 | + (sb->level == 4) || (sb->level == 5))) | |
5773 | + printk (NOT_CLEAN_IGNORE, mdidx(mddev)); | |
5774 | + | |
5775 | + return 0; | |
5776 | +abort: | |
5777 | + return 1; | |
5778 | +} | |
5779 | + | |
5780 | +#undef INCONSISTENT | |
5781 | +#undef OUT_OF_DATE | |
5782 | +#undef OLD_VERSION | |
5783 | +#undef OLD_LEVEL | |
5784 | + | |
5785 | +static int device_size_calculation (mddev_t * mddev) | |
5786 | +{ | |
5787 | + int data_disks = 0, persistent; | |
5788 | + unsigned int readahead; | |
5789 | + mdp_super_t *sb = mddev->sb; | |
5790 | + struct md_list_head *tmp; | |
5791 | + mdk_rdev_t *rdev; | |
5792 | + | |
5793 | + /* | |
5794 | + * Do device size calculation. Bail out if too small. | |
5795 | + * (we have to do this after having validated chunk_size, | |
5796 | + * because device size has to be modulo chunk_size) | |
5797 | + */ | |
5798 | + persistent = !mddev->sb->not_persistent; | |
5799 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5800 | + if (rdev->faulty) | |
5801 | + continue; | |
5802 | + if (rdev->size) { | |
5803 | + MD_BUG(); | |
5804 | + continue; | |
5805 | + } | |
5806 | + rdev->size = calc_dev_size(rdev->dev, mddev, persistent); | |
5807 | + if (rdev->size < sb->chunk_size / 1024) { | |
5808 | + printk (KERN_WARNING | |
5809 | + "Dev %s smaller than chunk_size: %dk < %dk\n", | |
5810 | + partition_name(rdev->dev), | |
5811 | + rdev->size, sb->chunk_size / 1024); | |
5812 | + return -EINVAL; | |
5813 | + } | |
5814 | + } | |
5815 | + | |
5816 | + switch (sb->level) { | |
5817 | + case -3: | |
5818 | + data_disks = 1; | |
5819 | + break; | |
5820 | + case -2: | |
5821 | + data_disks = 1; | |
5822 | + break; | |
5823 | + case -1: | |
5824 | + zoned_raid_size(mddev); | |
5825 | + data_disks = 1; | |
5826 | + break; | |
5827 | + case 0: | |
5828 | + zoned_raid_size(mddev); | |
5829 | + data_disks = sb->raid_disks; | |
5830 | + break; | |
5831 | + case 1: | |
5832 | + data_disks = 1; | |
5833 | + break; | |
5834 | + case 4: | |
5835 | + case 5: | |
5836 | + data_disks = sb->raid_disks-1; | |
5837 | + break; | |
5838 | + default: | |
5839 | + printk (UNKNOWN_LEVEL, mdidx(mddev), sb->level); | |
5840 | + goto abort; | |
5841 | + } | |
5842 | + if (!md_size[mdidx(mddev)]) | |
5843 | + md_size[mdidx(mddev)] = sb->size * data_disks; | |
5844 | + | |
5845 | + readahead = MD_READAHEAD; | |
5846 | + if ((sb->level == 0) || (sb->level == 4) || (sb->level == 5)) | |
5847 | + readahead = mddev->sb->chunk_size * 4 * data_disks; | |
5848 | + if (readahead < data_disks * MAX_SECTORS*512*2) | |
5849 | + readahead = data_disks * MAX_SECTORS*512*2; | |
5850 | + else { | |
5851 | + if (sb->level == -3) | |
5852 | + readahead = 0; | |
5853 | + } | |
5854 | + md_maxreadahead[mdidx(mddev)] = readahead; | |
5855 | + | |
5856 | + printk(KERN_INFO "md%d: max total readahead window set to %dk\n", | |
5857 | + mdidx(mddev), readahead/1024); | |
5858 | + | |
5859 | + printk(KERN_INFO | |
5860 | + "md%d: %d data-disks, max readahead per data-disk: %dk\n", | |
5861 | + mdidx(mddev), data_disks, readahead/data_disks/1024); | |
5862 | + return 0; | |
5863 | +abort: | |
5864 | + return 1; | |
5865 | +} | |
5866 | + | |
5867 | + | |
5868 | +#define TOO_BIG_CHUNKSIZE KERN_ERR \ | |
5869 | +"too big chunk_size: %d > %d\n" | |
5870 | + | |
5871 | +#define TOO_SMALL_CHUNKSIZE KERN_ERR \ | |
5872 | +"too small chunk_size: %d < %ld\n" | |
5873 | + | |
5874 | +#define BAD_CHUNKSIZE KERN_ERR \ | |
5875 | +"no chunksize specified, see 'man raidtab'\n" | |
5876 | + | |
5877 | +static int do_md_run (mddev_t * mddev) | |
5878 | +{ | |
5879 | + int pnum, err; | |
5880 | + int chunk_size; | |
5881 | + struct md_list_head *tmp; | |
5882 | + mdk_rdev_t *rdev; | |
5883 | + | |
5884 | + | |
5885 | + if (!mddev->nb_dev) { | |
5886 | + MD_BUG(); | |
5887 | + return -EINVAL; | |
5888 | + } | |
5889 | + | |
5890 | + if (mddev->pers) | |
5891 | + return -EBUSY; | |
5892 | + | |
5893 | + /* | |
5894 | + * Resize disks to align partitions size on a given | |
5895 | + * chunk size. | |
5896 | + */ | |
5897 | + md_size[mdidx(mddev)] = 0; | |
5898 | + | |
5899 | + /* | |
5900 | + * Analyze all RAID superblock(s) | |
5901 | + */ | |
5902 | + if (analyze_sbs(mddev)) { | |
5903 | + MD_BUG(); | |
5904 | + return -EINVAL; | |
5905 | + } | |
5906 | + | |
5907 | + chunk_size = mddev->sb->chunk_size; | |
5908 | + pnum = level_to_pers(mddev->sb->level); | |
5909 | + | |
5910 | + mddev->param.chunk_size = chunk_size; | |
5911 | + mddev->param.personality = pnum; | |
5912 | + | |
5913 | + if (chunk_size > MAX_CHUNK_SIZE) { | |
5914 | + printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE); | |
5915 | + return -EINVAL; | |
5916 | + } | |
5917 | + /* | |
5918 | + * chunk-size has to be a power of 2 and multiples of PAGE_SIZE | |
5919 | + */ | |
5920 | + if ( (1 << ffz(~chunk_size)) != chunk_size) { | |
5921 | + MD_BUG(); | |
5922 | + return -EINVAL; | |
5923 | + } | |
5924 | + if (chunk_size < PAGE_SIZE) { | |
5925 | + printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE); | |
5926 | + return -EINVAL; | |
5927 | + } | |
5928 | + | |
5929 | + if (pnum >= MAX_PERSONALITY) { | |
5930 | + MD_BUG(); | |
5931 | + return -EINVAL; | |
5932 | + } | |
5933 | + | |
5934 | + if ((pnum != RAID1) && (pnum != LINEAR) && !chunk_size) { | |
5935 | + /* | |
5936 | + * 'default chunksize' in the old md code used to | |
5937 | + * be PAGE_SIZE, baaad. | |
5938 | + * we abort here to be on the safe side. We dont | |
5939 | + * want to continue the bad practice. | |
5940 | + */ | |
5941 | + printk(BAD_CHUNKSIZE); | |
5942 | + return -EINVAL; | |
5943 | + } | |
5944 | + | |
5945 | + if (!pers[pnum]) | |
5946 | + { | |
5947 | +#ifdef CONFIG_KMOD | |
5948 | + char module_name[80]; | |
5949 | + sprintf (module_name, "md-personality-%d", pnum); | |
5950 | + request_module (module_name); | |
5951 | + if (!pers[pnum]) | |
5952 | +#endif | |
5953 | + return -EINVAL; | |
5954 | + } | |
5955 | + | |
5956 | + if (device_size_calculation(mddev)) | |
5957 | + return -EINVAL; | |
5958 | + | |
5959 | + /* | |
5960 | + * Drop all container device buffers, from now on | |
5961 | + * the only valid external interface is through the md | |
5962 | + * device. | |
5963 | + */ | |
5964 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
5965 | + if (rdev->faulty) | |
5966 | + continue; | |
5967 | + fsync_dev(rdev->dev); | |
5968 | + invalidate_buffers(rdev->dev); | |
5969 | + } | |
5970 | + | |
5971 | + mddev->pers = pers[pnum]; | |
5972 | + | |
5973 | + err = mddev->pers->run(mddev); | |
5974 | + if (err) { | |
5975 | + printk("pers->run() failed ...\n"); | |
5976 | + mddev->pers = NULL; | |
5977 | + return -EINVAL; | |
5978 | + } | |
5979 | + | |
5980 | + mddev->sb->state &= ~(1 << MD_SB_CLEAN); | |
5981 | + md_update_sb(mddev); | |
5982 | + | |
5983 | + /* | |
5984 | + * md_size has units of 1K blocks, which are | |
5985 | + * twice as large as sectors. | |
5986 | + */ | |
5987 | + md_hd_struct[mdidx(mddev)].start_sect = 0; | |
5988 | + md_hd_struct[mdidx(mddev)].nr_sects = md_size[mdidx(mddev)] << 1; | |
5989 | + | |
5990 | + read_ahead[MD_MAJOR] = 1024; | |
5991 | + return (0); | |
5992 | +} | |
5993 | + | |
5994 | +#undef TOO_BIG_CHUNKSIZE | |
5995 | +#undef BAD_CHUNKSIZE | |
5996 | + | |
5997 | +#define OUT(x) do { err = (x); goto out; } while (0) | |
5998 | + | |
5999 | +static int restart_array (mddev_t *mddev) | |
6000 | +{ | |
6001 | + int err = 0; | |
6002 | + | |
6003 | + /* | |
6004 | + * Complain if it has no devices | |
6005 | + */ | |
6006 | + if (!mddev->nb_dev) | |
6007 | + OUT(-ENXIO); | |
6008 | + | |
6009 | + if (mddev->pers) { | |
6010 | + if (!mddev->ro) | |
6011 | + OUT(-EBUSY); | |
6012 | + | |
6013 | + mddev->ro = 0; | |
6014 | + set_device_ro(mddev_to_kdev(mddev), 0); | |
6015 | + | |
6016 | + printk (KERN_INFO | |
6017 | + "md%d switched to read-write mode.\n", mdidx(mddev)); | |
6018 | + /* | |
6019 | + * Kick recovery or resync if necessary | |
6020 | + */ | |
6021 | + md_recover_arrays(); | |
6022 | + if (mddev->pers->restart_resync) | |
6023 | + mddev->pers->restart_resync(mddev); | |
6024 | + } else | |
6025 | + err = -EINVAL; | |
6026 | + | |
6027 | +out: | |
6028 | + return err; | |
6029 | +} | |
6030 | + | |
6031 | +#define STILL_MOUNTED KERN_WARNING \ | |
6032 | +"md: md%d still mounted.\n" | |
6033 | + | |
6034 | +static int do_md_stop (mddev_t * mddev, int ro) | |
6035 | +{ | |
6036 | + int err = 0, resync_interrupted = 0; | |
6037 | + kdev_t dev = mddev_to_kdev(mddev); | |
6038 | + | |
6039 | + if (!ro && !fs_may_mount (dev)) { | |
6040 | + printk (STILL_MOUNTED, mdidx(mddev)); | |
6041 | + OUT(-EBUSY); | |
6042 | + } | |
6043 | + | |
6044 | + /* | |
6045 | + * complain if it's already stopped | |
6046 | + */ | |
6047 | + if (!mddev->nb_dev) | |
6048 | + OUT(-ENXIO); | |
6049 | + | |
6050 | + if (mddev->pers) { | |
6051 | + /* | |
6052 | + * It is safe to call stop here, it only frees private | |
6053 | + * data. Also, it tells us if a device is unstoppable | |
6054 | + * (eg. resyncing is in progress) | |
6055 | + */ | |
6056 | + if (mddev->pers->stop_resync) | |
6057 | + if (mddev->pers->stop_resync(mddev)) | |
6058 | + resync_interrupted = 1; | |
6059 | + | |
6060 | + if (mddev->recovery_running) | |
6061 | + md_interrupt_thread(md_recovery_thread); | |
6062 | + | |
6063 | + /* | |
6064 | + * This synchronizes with signal delivery to the | |
6065 | + * resync or reconstruction thread. It also nicely | |
6066 | + * hangs the process if some reconstruction has not | |
6067 | + * finished. | |
6068 | + */ | |
6069 | + down(&mddev->recovery_sem); | |
6070 | + up(&mddev->recovery_sem); | |
6071 | + | |
6072 | + /* | |
6073 | + * sync and invalidate buffers because we cannot kill the | |
6074 | + * main thread with valid IO transfers still around. | |
6075 | + * the kernel lock protects us from new requests being | |
6076 | + * added after invalidate_buffers(). | |
6077 | + */ | |
6078 | + fsync_dev (mddev_to_kdev(mddev)); | |
6079 | + fsync_dev (dev); | |
6080 | + invalidate_buffers (dev); | |
6081 | + | |
6082 | + if (ro) { | |
6083 | + if (mddev->ro) | |
6084 | + OUT(-ENXIO); | |
6085 | + mddev->ro = 1; | |
6086 | + } else { | |
6087 | + if (mddev->ro) | |
6088 | + set_device_ro(dev, 0); | |
6089 | + if (mddev->pers->stop(mddev)) { | |
6090 | + if (mddev->ro) | |
6091 | + set_device_ro(dev, 1); | |
6092 | + OUT(-EBUSY); | |
6093 | + } | |
6094 | + if (mddev->ro) | |
6095 | + mddev->ro = 0; | |
6096 | + } | |
6097 | + if (mddev->sb) { | |
6098 | + /* | |
6099 | + * mark it clean only if there was no resync | |
6100 | + * interrupted. | |
6101 | + */ | |
6102 | + if (!mddev->recovery_running && !resync_interrupted) { | |
6103 | + printk("marking sb clean...\n"); | |
6104 | + mddev->sb->state |= 1 << MD_SB_CLEAN; | |
6105 | + } | |
6106 | + md_update_sb(mddev); | |
6107 | + } | |
6108 | + if (ro) | |
6109 | + set_device_ro(dev, 1); | |
6110 | + } | |
6111 | + | |
6112 | + /* | |
6113 | + * Free resources if final stop | |
6114 | + */ | |
6115 | + if (!ro) { | |
6116 | + export_array(mddev); | |
6117 | + md_size[mdidx(mddev)] = 0; | |
6118 | + md_hd_struct[mdidx(mddev)].nr_sects = 0; | |
6119 | + free_mddev(mddev); | |
6120 | + | |
6121 | + printk (KERN_INFO "md%d stopped.\n", mdidx(mddev)); | |
6122 | + } else | |
6123 | + printk (KERN_INFO | |
6124 | + "md%d switched to read-only mode.\n", mdidx(mddev)); | |
6125 | +out: | |
6126 | + return err; | |
6127 | +} | |
6128 | + | |
6129 | +#undef OUT | |
6130 | + | |
6131 | +/* | |
6132 | + * We have to safely support old arrays too. | |
6133 | + */ | |
6134 | +int detect_old_array (mdp_super_t *sb) | |
6135 | +{ | |
6136 | + if (sb->major_version > 0) | |
6137 | + return 0; | |
6138 | + if (sb->minor_version >= 90) | |
6139 | + return 0; | |
6140 | + | |
6141 | + return -EINVAL; | |
6142 | +} | |
6143 | + | |
6144 | + | |
6145 | +static void autorun_array (mddev_t *mddev) | |
6146 | +{ | |
6147 | + mdk_rdev_t *rdev; | |
6148 | + struct md_list_head *tmp; | |
6149 | + int err; | |
6150 | + | |
6151 | + if (mddev->disks.prev == &mddev->disks) { | |
6152 | + MD_BUG(); | |
6153 | + return; | |
6154 | + } | |
6155 | + | |
6156 | + printk("running: "); | |
6157 | + | |
6158 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
6159 | + printk("<%s>", partition_name(rdev->dev)); | |
6160 | + } | |
6161 | + printk("\nnow!\n"); | |
6162 | + | |
6163 | + err = do_md_run (mddev); | |
6164 | + if (err) { | |
6165 | + printk("do_md_run() returned %d\n", err); | |
6166 | + /* | |
6167 | + * prevent the writeback of an unrunnable array | |
6168 | + */ | |
6169 | + mddev->sb_dirty = 0; | |
6170 | + do_md_stop (mddev, 0); | |
6171 | + } | |
6172 | +} | |
6173 | + | |
6174 | +/* | |
6175 | + * lets try to run arrays based on all disks that have arrived | |
6176 | + * until now. (those are in the ->pending list) | |
6177 | + * | |
6178 | + * the method: pick the first pending disk, collect all disks with | |
6179 | + * the same UUID, remove all from the pending list and put them into | |
6180 | + * the 'same_array' list. Then order this list based on superblock | |
6181 | + * update time (freshest comes first), kick out 'old' disks and | |
6182 | + * compare superblocks. If everything's fine then run it. | |
6183 | + */ | |
6184 | +static void autorun_devices (void) | |
6185 | +{ | |
6186 | + struct md_list_head candidates; | |
6187 | + struct md_list_head *tmp; | |
6188 | + mdk_rdev_t *rdev0, *rdev; | |
6189 | + mddev_t *mddev; | |
6190 | + kdev_t md_kdev; | |
6191 | + | |
6192 | + | |
6193 | + printk("autorun ...\n"); | |
6194 | + while (pending_raid_disks.next != &pending_raid_disks) { | |
6195 | + rdev0 = md_list_entry(pending_raid_disks.next, | |
6196 | + mdk_rdev_t, pending); | |
6197 | + | |
6198 | + printk("considering %s ...\n", partition_name(rdev0->dev)); | |
6199 | + MD_INIT_LIST_HEAD(&candidates); | |
6200 | + ITERATE_RDEV_PENDING(rdev,tmp) { | |
6201 | + if (uuid_equal(rdev0, rdev)) { | |
6202 | + if (!sb_equal(rdev0->sb, rdev->sb)) { | |
6203 | + printk("%s has same UUID as %s, but superblocks differ ...\n", partition_name(rdev->dev), partition_name(rdev0->dev)); | |
6204 | + continue; | |
6205 | + } | |
6206 | + printk(" adding %s ...\n", partition_name(rdev->dev)); | |
6207 | + md_list_del(&rdev->pending); | |
6208 | + md_list_add(&rdev->pending, &candidates); | |
6209 | + } | |
6210 | + } | |
6211 | /* | |
6212 | - * Check the superblock for consistency. | |
6213 | - * The personality itself has to check whether it's getting | |
6214 | - * added with the proper flags. The personality has to be | |
6215 | - * checked too. ;) | |
6216 | + * now we have a set of devices, with all of them having | |
6217 | + * mostly sane superblocks. It's time to allocate the | |
6218 | + * mddev. | |
6219 | */ | |
6220 | - if (analyze_one_sb (realdev)) | |
6221 | + md_kdev = MKDEV(MD_MAJOR, rdev0->sb->md_minor); | |
6222 | + mddev = kdev_to_mddev(md_kdev); | |
6223 | + if (mddev) { | |
6224 | + printk("md%d already running, cannot run %s\n", | |
6225 | + mdidx(mddev), partition_name(rdev0->dev)); | |
6226 | + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) | |
6227 | + export_rdev(rdev); | |
6228 | + continue; | |
6229 | + } | |
6230 | + mddev = alloc_mddev(md_kdev); | |
6231 | + printk("created md%d\n", mdidx(mddev)); | |
6232 | + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { | |
6233 | + bind_rdev_to_array(rdev, mddev); | |
6234 | + md_list_del(&rdev->pending); | |
6235 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
6236 | + } | |
6237 | + autorun_array(mddev); | |
6238 | + } | |
6239 | + printk("... autorun DONE.\n"); | |
6240 | +} | |
6241 | + | |
6242 | +/* | |
6243 | + * import RAID devices based on one partition | |
6244 | + * if possible, the array gets run as well. | |
6245 | + */ | |
6246 | + | |
6247 | +#define BAD_VERSION KERN_ERR \ | |
6248 | +"md: %s has RAID superblock version 0.%d, autodetect needs v0.90 or higher\n" | |
6249 | + | |
6250 | +#define OUT_OF_MEM KERN_ALERT \ | |
6251 | +"md: out of memory.\n" | |
6252 | + | |
6253 | +#define NO_DEVICE KERN_ERR \ | |
6254 | +"md: disabled device %s\n" | |
6255 | + | |
6256 | +#define AUTOADD_FAILED KERN_ERR \ | |
6257 | +"md: auto-adding devices to md%d FAILED (error %d).\n" | |
6258 | + | |
6259 | +#define AUTOADD_FAILED_USED KERN_ERR \ | |
6260 | +"md: cannot auto-add device %s to md%d, already used.\n" | |
6261 | + | |
6262 | +#define AUTORUN_FAILED KERN_ERR \ | |
6263 | +"md: auto-running md%d FAILED (error %d).\n" | |
6264 | + | |
6265 | +#define MDDEV_BUSY KERN_ERR \ | |
6266 | +"md: cannot auto-add to md%d, already running.\n" | |
6267 | + | |
6268 | +#define AUTOADDING KERN_INFO \ | |
6269 | +"md: auto-adding devices to md%d, based on %s's superblock.\n" | |
6270 | + | |
6271 | +#define AUTORUNNING KERN_INFO \ | |
6272 | +"md: auto-running md%d.\n" | |
6273 | + | |
6274 | +static int autostart_array (kdev_t startdev) | |
6275 | +{ | |
6276 | + int err = -EINVAL, i; | |
6277 | + mdp_super_t *sb = NULL; | |
6278 | + mdk_rdev_t *start_rdev = NULL, *rdev; | |
6279 | + | |
6280 | + if (md_import_device(startdev, 1)) { | |
6281 | + printk("could not import %s!\n", partition_name(startdev)); | |
6282 | + goto abort; | |
6283 | + } | |
6284 | + | |
6285 | + start_rdev = find_rdev_all(startdev); | |
6286 | + if (!start_rdev) { | |
6287 | + MD_BUG(); | |
6288 | + goto abort; | |
6289 | + } | |
6290 | + if (start_rdev->faulty) { | |
6291 | + printk("can not autostart based on faulty %s!\n", | |
6292 | + partition_name(startdev)); | |
6293 | + goto abort; | |
6294 | + } | |
6295 | + md_list_add(&start_rdev->pending, &pending_raid_disks); | |
6296 | + | |
6297 | + sb = start_rdev->sb; | |
6298 | + | |
6299 | + err = detect_old_array(sb); | |
6300 | + if (err) { | |
6301 | + printk("array version is too old to be autostarted, use raidtools 0.90 mkraid --upgrade\nto upgrade the array without data loss!\n"); | |
6302 | + goto abort; | |
6303 | + } | |
6304 | + | |
6305 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
6306 | + mdp_disk_t *desc; | |
6307 | + kdev_t dev; | |
6308 | + | |
6309 | + desc = sb->disks + i; | |
6310 | + dev = MKDEV(desc->major, desc->minor); | |
6311 | + | |
6312 | + if (dev == MKDEV(0,0)) | |
6313 | + continue; | |
6314 | + if (dev == startdev) | |
6315 | + continue; | |
6316 | + if (md_import_device(dev, 1)) { | |
6317 | + printk("could not import %s, trying to run array nevertheless.\n", partition_name(dev)); | |
6318 | + continue; | |
6319 | + } | |
6320 | + rdev = find_rdev_all(dev); | |
6321 | + if (!rdev) { | |
6322 | + MD_BUG(); | |
6323 | + goto abort; | |
6324 | + } | |
6325 | + md_list_add(&rdev->pending, &pending_raid_disks); | |
6326 | + } | |
6327 | + | |
6328 | + /* | |
6329 | + * possibly return codes | |
6330 | + */ | |
6331 | + autorun_devices(); | |
6332 | + return 0; | |
6333 | + | |
6334 | +abort: | |
6335 | + if (start_rdev) | |
6336 | + export_rdev(start_rdev); | |
6337 | + return err; | |
6338 | +} | |
6339 | + | |
6340 | +#undef BAD_VERSION | |
6341 | +#undef OUT_OF_MEM | |
6342 | +#undef NO_DEVICE | |
6343 | +#undef AUTOADD_FAILED_USED | |
6344 | +#undef AUTOADD_FAILED | |
6345 | +#undef AUTORUN_FAILED | |
6346 | +#undef AUTOADDING | |
6347 | +#undef AUTORUNNING | |
6348 | + | |
6349 | +struct { | |
6350 | + int set; | |
6351 | + int noautodetect; | |
6352 | + | |
6353 | +} raid_setup_args md__initdata = { 0, 0 }; | |
6354 | + | |
6355 | +/* | |
6356 | + * Searches all registered partitions for autorun RAID arrays | |
6357 | + * at boot time. | |
6358 | + */ | |
6359 | +md__initfunc(void autodetect_raid(void)) | |
6360 | +{ | |
6361 | +#ifdef CONFIG_AUTODETECT_RAID | |
6362 | + struct gendisk *disk; | |
6363 | + mdk_rdev_t *rdev; | |
6364 | + int i; | |
6365 | + | |
6366 | + if (raid_setup_args.noautodetect) { | |
6367 | + printk(KERN_INFO "skipping autodetection of RAID arrays\n"); | |
6368 | + return; | |
6369 | + } | |
6370 | + printk(KERN_INFO "autodetecting RAID arrays\n"); | |
6371 | + | |
6372 | + for (disk = gendisk_head ; disk ; disk = disk->next) { | |
6373 | + for (i = 0; i < disk->max_p*disk->max_nr; i++) { | |
6374 | + kdev_t dev = MKDEV(disk->major,i); | |
6375 | + | |
6376 | + if (disk->part[i].type == LINUX_OLD_RAID_PARTITION) { | |
6377 | + printk(KERN_ALERT | |
6378 | +"md: %s's partition type has to be changed from type 0x86 to type 0xfd\n" | |
6379 | +" to maintain interoperability with other OSs! Autodetection support for\n" | |
6380 | +" type 0x86 will be deleted after some migration timeout. Sorry.\n", | |
6381 | + partition_name(dev)); | |
6382 | + disk->part[i].type = LINUX_RAID_PARTITION; | |
6383 | + } | |
6384 | + if (disk->part[i].type != LINUX_RAID_PARTITION) | |
6385 | + continue; | |
6386 | + | |
6387 | + if (md_import_device(dev,1)) { | |
6388 | + printk(KERN_ALERT "could not import %s!\n", | |
6389 | + partition_name(dev)); | |
6390 | + continue; | |
6391 | + } | |
6392 | + /* | |
6393 | + * Sanity checks: | |
6394 | + */ | |
6395 | + rdev = find_rdev_all(dev); | |
6396 | + if (!rdev) { | |
6397 | + MD_BUG(); | |
6398 | + continue; | |
6399 | + } | |
6400 | + if (rdev->faulty) { | |
6401 | + MD_BUG(); | |
6402 | + continue; | |
6403 | + } | |
6404 | + md_list_add(&rdev->pending, &pending_raid_disks); | |
6405 | + } | |
6406 | + } | |
6407 | + | |
6408 | + autorun_devices(); | |
6409 | +#endif | |
6410 | +} | |
6411 | + | |
6412 | +static int get_version (void * arg) | |
6413 | +{ | |
6414 | + mdu_version_t ver; | |
6415 | + | |
6416 | + ver.major = MD_MAJOR_VERSION; | |
6417 | + ver.minor = MD_MINOR_VERSION; | |
6418 | + ver.patchlevel = MD_PATCHLEVEL_VERSION; | |
6419 | + | |
6420 | + if (md_copy_to_user(arg, &ver, sizeof(ver))) | |
6421 | + return -EFAULT; | |
6422 | + | |
6423 | + return 0; | |
6424 | +} | |
6425 | + | |
6426 | +#define SET_FROM_SB(x) info.x = mddev->sb->x | |
6427 | +static int get_array_info (mddev_t * mddev, void * arg) | |
6428 | +{ | |
6429 | + mdu_array_info_t info; | |
6430 | + | |
6431 | + if (!mddev->sb) | |
6432 | + return -EINVAL; | |
6433 | + | |
6434 | + SET_FROM_SB(major_version); | |
6435 | + SET_FROM_SB(minor_version); | |
6436 | + SET_FROM_SB(patch_version); | |
6437 | + SET_FROM_SB(ctime); | |
6438 | + SET_FROM_SB(level); | |
6439 | + SET_FROM_SB(size); | |
6440 | + SET_FROM_SB(nr_disks); | |
6441 | + SET_FROM_SB(raid_disks); | |
6442 | + SET_FROM_SB(md_minor); | |
6443 | + SET_FROM_SB(not_persistent); | |
6444 | + | |
6445 | + SET_FROM_SB(utime); | |
6446 | + SET_FROM_SB(state); | |
6447 | + SET_FROM_SB(active_disks); | |
6448 | + SET_FROM_SB(working_disks); | |
6449 | + SET_FROM_SB(failed_disks); | |
6450 | + SET_FROM_SB(spare_disks); | |
6451 | + | |
6452 | + SET_FROM_SB(layout); | |
6453 | + SET_FROM_SB(chunk_size); | |
6454 | + | |
6455 | + if (md_copy_to_user(arg, &info, sizeof(info))) | |
6456 | + return -EFAULT; | |
6457 | + | |
6458 | + return 0; | |
6459 | +} | |
6460 | +#undef SET_FROM_SB | |
6461 | + | |
6462 | +#define SET_FROM_SB(x) info.x = mddev->sb->disks[nr].x | |
6463 | +static int get_disk_info (mddev_t * mddev, void * arg) | |
6464 | +{ | |
6465 | + mdu_disk_info_t info; | |
6466 | + unsigned int nr; | |
6467 | + | |
6468 | + if (!mddev->sb) | |
6469 | + return -EINVAL; | |
6470 | + | |
6471 | + if (md_copy_from_user(&info, arg, sizeof(info))) | |
6472 | + return -EFAULT; | |
6473 | + | |
6474 | + nr = info.number; | |
6475 | + if (nr >= mddev->sb->nr_disks) | |
6476 | + return -EINVAL; | |
6477 | + | |
6478 | + SET_FROM_SB(major); | |
6479 | + SET_FROM_SB(minor); | |
6480 | + SET_FROM_SB(raid_disk); | |
6481 | + SET_FROM_SB(state); | |
6482 | + | |
6483 | + if (md_copy_to_user(arg, &info, sizeof(info))) | |
6484 | + return -EFAULT; | |
6485 | + | |
6486 | + return 0; | |
6487 | +} | |
6488 | +#undef SET_FROM_SB | |
6489 | + | |
6490 | +#define SET_SB(x) mddev->sb->disks[nr].x = info.x | |
6491 | + | |
6492 | +static int add_new_disk (mddev_t * mddev, void * arg) | |
6493 | +{ | |
6494 | + int err, size, persistent; | |
6495 | + mdu_disk_info_t info; | |
6496 | + mdk_rdev_t *rdev; | |
6497 | + unsigned int nr; | |
6498 | + kdev_t dev; | |
6499 | + | |
6500 | + if (!mddev->sb) | |
6501 | + return -EINVAL; | |
6502 | + | |
6503 | + if (md_copy_from_user(&info, arg, sizeof(info))) | |
6504 | + return -EFAULT; | |
6505 | + | |
6506 | + nr = info.number; | |
6507 | + if (nr >= mddev->sb->nr_disks) | |
6508 | + return -EINVAL; | |
6509 | + | |
6510 | + dev = MKDEV(info.major,info.minor); | |
6511 | + | |
6512 | + if (find_rdev_all(dev)) { | |
6513 | + printk("device %s already used in a RAID array!\n", | |
6514 | + partition_name(dev)); | |
6515 | + return -EBUSY; | |
6516 | + } | |
6517 | + | |
6518 | + SET_SB(number); | |
6519 | + SET_SB(major); | |
6520 | + SET_SB(minor); | |
6521 | + SET_SB(raid_disk); | |
6522 | + SET_SB(state); | |
6523 | + | |
6524 | + if ((info.state & (1<<MD_DISK_FAULTY))==0) { | |
6525 | + err = md_import_device (dev, 0); | |
6526 | + if (err) { | |
6527 | + printk("md: error, md_import_device() returned %d\n", err); | |
6528 | + return -EINVAL; | |
6529 | + } | |
6530 | + rdev = find_rdev_all(dev); | |
6531 | + if (!rdev) { | |
6532 | + MD_BUG(); | |
6533 | return -EINVAL; | |
6534 | + } | |
6535 | + | |
6536 | + rdev->old_dev = dev; | |
6537 | + rdev->desc_nr = info.number; | |
6538 | + | |
6539 | + bind_rdev_to_array(rdev, mddev); | |
6540 | + | |
6541 | + persistent = !mddev->sb->not_persistent; | |
6542 | + if (!persistent) | |
6543 | + printk("nonpersistent superblock ...\n"); | |
6544 | + if (!mddev->sb->chunk_size) | |
6545 | + printk("no chunksize?\n"); | |
6546 | + | |
6547 | + size = calc_dev_size(dev, mddev, persistent); | |
6548 | + rdev->sb_offset = calc_dev_sboffset(dev, mddev, persistent); | |
6549 | + | |
6550 | + if (!mddev->sb->size || (mddev->sb->size > size)) | |
6551 | + mddev->sb->size = size; | |
6552 | + } | |
6553 | + | |
6554 | + /* | |
6555 | + * sync all other superblocks with the main superblock | |
6556 | + */ | |
6557 | + sync_sbs(mddev); | |
6558 | + | |
6559 | + return 0; | |
6560 | +} | |
6561 | +#undef SET_SB | |
6562 | + | |
6563 | +static int hot_remove_disk (mddev_t * mddev, kdev_t dev) | |
6564 | +{ | |
6565 | + int err; | |
6566 | + mdk_rdev_t *rdev; | |
6567 | + mdp_disk_t *disk; | |
6568 | + | |
6569 | + if (!mddev->pers) | |
6570 | + return -ENODEV; | |
6571 | + | |
6572 | + printk("trying to remove %s from md%d ... \n", | |
6573 | + partition_name(dev), mdidx(mddev)); | |
6574 | + | |
6575 | + if (!mddev->pers->diskop) { | |
6576 | + printk("md%d: personality does not support diskops!\n", | |
6577 | + mdidx(mddev)); | |
6578 | + return -EINVAL; | |
6579 | + } | |
6580 | + | |
6581 | + rdev = find_rdev(mddev, dev); | |
6582 | + if (!rdev) | |
6583 | + return -ENXIO; | |
6584 | + | |
6585 | + if (rdev->desc_nr == -1) { | |
6586 | + MD_BUG(); | |
6587 | + return -EINVAL; | |
6588 | + } | |
6589 | + disk = &mddev->sb->disks[rdev->desc_nr]; | |
6590 | + if (disk_active(disk)) | |
6591 | + goto busy; | |
6592 | + if (disk_removed(disk)) { | |
6593 | + MD_BUG(); | |
6594 | + return -EINVAL; | |
6595 | + } | |
6596 | + | |
6597 | + err = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_DISK); | |
6598 | + if (err == -EBUSY) | |
6599 | + goto busy; | |
6600 | + if (err) { | |
6601 | + MD_BUG(); | |
6602 | + return -EINVAL; | |
6603 | + } | |
6604 | + | |
6605 | + remove_descriptor(disk, mddev->sb); | |
6606 | + kick_rdev_from_array(rdev); | |
6607 | + mddev->sb_dirty = 1; | |
6608 | + md_update_sb(mddev); | |
6609 | + | |
6610 | + return 0; | |
6611 | +busy: | |
6612 | + printk("cannot remove active disk %s from md%d ... \n", | |
6613 | + partition_name(dev), mdidx(mddev)); | |
6614 | + return -EBUSY; | |
6615 | +} | |
6616 | + | |
6617 | +static int hot_add_disk (mddev_t * mddev, kdev_t dev) | |
6618 | +{ | |
6619 | + int i, err, persistent; | |
6620 | + unsigned int size; | |
6621 | + mdk_rdev_t *rdev; | |
6622 | + mdp_disk_t *disk; | |
6623 | + | |
6624 | + if (!mddev->pers) | |
6625 | + return -ENODEV; | |
6626 | + | |
6627 | + printk("trying to hot-add %s to md%d ... \n", | |
6628 | + partition_name(dev), mdidx(mddev)); | |
6629 | + | |
6630 | + if (!mddev->pers->diskop) { | |
6631 | + printk("md%d: personality does not support diskops!\n", | |
6632 | + mdidx(mddev)); | |
6633 | + return -EINVAL; | |
6634 | + } | |
6635 | + | |
6636 | + persistent = !mddev->sb->not_persistent; | |
6637 | + size = calc_dev_size(dev, mddev, persistent); | |
6638 | + | |
6639 | + if (size < mddev->sb->size) { | |
6640 | + printk("md%d: disk size %d blocks < array size %d\n", | |
6641 | + mdidx(mddev), size, mddev->sb->size); | |
6642 | + return -ENOSPC; | |
6643 | + } | |
6644 | + | |
6645 | + rdev = find_rdev(mddev, dev); | |
6646 | + if (rdev) | |
6647 | + return -EBUSY; | |
6648 | + | |
6649 | + err = md_import_device (dev, 0); | |
6650 | + if (err) { | |
6651 | + printk("md: error, md_import_device() returned %d\n", err); | |
6652 | + return -EINVAL; | |
6653 | + } | |
6654 | + rdev = find_rdev_all(dev); | |
6655 | + if (!rdev) { | |
6656 | + MD_BUG(); | |
6657 | + return -EINVAL; | |
6658 | + } | |
6659 | + if (rdev->faulty) { | |
6660 | + printk("md: can not hot-add faulty %s disk to md%d!\n", | |
6661 | + partition_name(dev), mdidx(mddev)); | |
6662 | + err = -EINVAL; | |
6663 | + goto abort_export; | |
6664 | + } | |
6665 | + bind_rdev_to_array(rdev, mddev); | |
6666 | + | |
6667 | + /* | |
6668 | + * The rest should better be atomic, we can have disk failures | |
6669 | + * noticed in interrupt contexts ... | |
6670 | + */ | |
6671 | + cli(); | |
6672 | + rdev->old_dev = dev; | |
6673 | + rdev->size = size; | |
6674 | + rdev->sb_offset = calc_dev_sboffset(dev, mddev, persistent); | |
6675 | + | |
6676 | + disk = mddev->sb->disks + mddev->sb->raid_disks; | |
6677 | + for (i = mddev->sb->raid_disks; i < MD_SB_DISKS; i++) { | |
6678 | + disk = mddev->sb->disks + i; | |
6679 | + | |
6680 | + if (!disk->major && !disk->minor) | |
6681 | + break; | |
6682 | + if (disk_removed(disk)) | |
6683 | + break; | |
6684 | + } | |
6685 | + if (i == MD_SB_DISKS) { | |
6686 | + sti(); | |
6687 | + printk("md%d: can not hot-add to full array!\n", mdidx(mddev)); | |
6688 | + err = -EBUSY; | |
6689 | + goto abort_unbind_export; | |
6690 | + } | |
6691 | + | |
6692 | + if (disk_removed(disk)) { | |
6693 | /* | |
6694 | - * hot_add has to bump up nb_dev itself | |
6695 | + * reuse slot | |
6696 | */ | |
6697 | - if (md_dev[minor].pers->hot_add_disk (&md_dev[minor], dev)) { | |
6698 | - /* | |
6699 | - * FIXME: here we should free up the inode and stuff | |
6700 | - */ | |
6701 | - printk ("FIXME\n"); | |
6702 | - return -EINVAL; | |
6703 | + if (disk->number != i) { | |
6704 | + sti(); | |
6705 | + MD_BUG(); | |
6706 | + err = -EINVAL; | |
6707 | + goto abort_unbind_export; | |
6708 | } | |
6709 | - } else | |
6710 | - md_dev[minor].nb_dev++; | |
6711 | + } else { | |
6712 | + disk->number = i; | |
6713 | + } | |
6714 | ||
6715 | - printk ("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor); | |
6716 | - return (0); | |
6717 | + disk->raid_disk = disk->number; | |
6718 | + disk->major = MAJOR(dev); | |
6719 | + disk->minor = MINOR(dev); | |
6720 | + | |
6721 | + if (mddev->pers->diskop(mddev, &disk, DISKOP_HOT_ADD_DISK)) { | |
6722 | + sti(); | |
6723 | + MD_BUG(); | |
6724 | + err = -EINVAL; | |
6725 | + goto abort_unbind_export; | |
6726 | + } | |
6727 | + | |
6728 | + mark_disk_spare(disk); | |
6729 | + mddev->sb->nr_disks++; | |
6730 | + mddev->sb->spare_disks++; | |
6731 | + mddev->sb->working_disks++; | |
6732 | + | |
6733 | + mddev->sb_dirty = 1; | |
6734 | + | |
6735 | + sti(); | |
6736 | + md_update_sb(mddev); | |
6737 | + | |
6738 | + /* | |
6739 | + * Kick recovery, maybe this spare has to be added to the | |
6740 | + * array immediately. | |
6741 | + */ | |
6742 | + md_recover_arrays(); | |
6743 | + | |
6744 | + return 0; | |
6745 | + | |
6746 | +abort_unbind_export: | |
6747 | + unbind_rdev_from_array(rdev); | |
6748 | + | |
6749 | +abort_export: | |
6750 | + export_rdev(rdev); | |
6751 | + return err; | |
6752 | +} | |
6753 | + | |
6754 | +#define SET_SB(x) mddev->sb->x = info.x | |
6755 | +static int set_array_info (mddev_t * mddev, void * arg) | |
6756 | +{ | |
6757 | + mdu_array_info_t info; | |
6758 | + | |
6759 | + if (mddev->sb) { | |
6760 | + printk("array md%d already has a superblock!\n", | |
6761 | + mdidx(mddev)); | |
6762 | + return -EBUSY; | |
6763 | + } | |
6764 | + | |
6765 | + if (md_copy_from_user(&info, arg, sizeof(info))) | |
6766 | + return -EFAULT; | |
6767 | + | |
6768 | + if (alloc_array_sb(mddev)) | |
6769 | + return -ENOMEM; | |
6770 | + | |
6771 | + mddev->sb->major_version = MD_MAJOR_VERSION; | |
6772 | + mddev->sb->minor_version = MD_MINOR_VERSION; | |
6773 | + mddev->sb->patch_version = MD_PATCHLEVEL_VERSION; | |
6774 | + mddev->sb->ctime = CURRENT_TIME; | |
6775 | + | |
6776 | + SET_SB(level); | |
6777 | + SET_SB(size); | |
6778 | + SET_SB(nr_disks); | |
6779 | + SET_SB(raid_disks); | |
6780 | + SET_SB(md_minor); | |
6781 | + SET_SB(not_persistent); | |
6782 | + | |
6783 | + SET_SB(state); | |
6784 | + SET_SB(active_disks); | |
6785 | + SET_SB(working_disks); | |
6786 | + SET_SB(failed_disks); | |
6787 | + SET_SB(spare_disks); | |
6788 | + | |
6789 | + SET_SB(layout); | |
6790 | + SET_SB(chunk_size); | |
6791 | + | |
6792 | + mddev->sb->md_magic = MD_SB_MAGIC; | |
6793 | + | |
6794 | + /* | |
6795 | + * Generate a 128 bit UUID | |
6796 | + */ | |
6797 | + get_random_bytes(&mddev->sb->set_uuid0, 4); | |
6798 | + get_random_bytes(&mddev->sb->set_uuid1, 4); | |
6799 | + get_random_bytes(&mddev->sb->set_uuid2, 4); | |
6800 | + get_random_bytes(&mddev->sb->set_uuid3, 4); | |
6801 | + | |
6802 | + return 0; | |
6803 | +} | |
6804 | +#undef SET_SB | |
6805 | + | |
6806 | +static int set_disk_info (mddev_t * mddev, void * arg) | |
6807 | +{ | |
6808 | + printk("not yet"); | |
6809 | + return -EINVAL; | |
6810 | +} | |
6811 | + | |
6812 | +static int clear_array (mddev_t * mddev) | |
6813 | +{ | |
6814 | + printk("not yet"); | |
6815 | + return -EINVAL; | |
6816 | +} | |
6817 | + | |
6818 | +static int write_raid_info (mddev_t * mddev) | |
6819 | +{ | |
6820 | + printk("not yet"); | |
6821 | + return -EINVAL; | |
6822 | +} | |
6823 | + | |
6824 | +static int protect_array (mddev_t * mddev) | |
6825 | +{ | |
6826 | + printk("not yet"); | |
6827 | + return -EINVAL; | |
6828 | +} | |
6829 | + | |
6830 | +static int unprotect_array (mddev_t * mddev) | |
6831 | +{ | |
6832 | + printk("not yet"); | |
6833 | + return -EINVAL; | |
6834 | +} | |
6835 | + | |
6836 | +static int set_disk_faulty (mddev_t *mddev, kdev_t dev) | |
6837 | +{ | |
6838 | + int ret; | |
6839 | + | |
6840 | + fsync_dev(mddev_to_kdev(mddev)); | |
6841 | + ret = md_error(mddev_to_kdev(mddev), dev); | |
6842 | + return ret; | |
6843 | } | |
6844 | ||
6845 | static int md_ioctl (struct inode *inode, struct file *file, | |
6846 | unsigned int cmd, unsigned long arg) | |
6847 | { | |
6848 | - int minor, err; | |
6849 | - struct hd_geometry *loc = (struct hd_geometry *) arg; | |
6850 | + unsigned int minor; | |
6851 | + int err = 0; | |
6852 | + struct hd_geometry *loc = (struct hd_geometry *) arg; | |
6853 | + mddev_t *mddev = NULL; | |
6854 | + kdev_t dev; | |
6855 | ||
6856 | - if (!capable(CAP_SYS_ADMIN)) | |
6857 | - return -EACCES; | |
6858 | + if (!md_capable_admin()) | |
6859 | + return -EACCES; | |
6860 | ||
6861 | - if (((minor=MINOR(inode->i_rdev)) & 0x80) && | |
6862 | - (minor & 0x7f) < MAX_PERSONALITY && | |
6863 | - pers[minor & 0x7f] && | |
6864 | - pers[minor & 0x7f]->ioctl) | |
6865 | - return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg)); | |
6866 | - | |
6867 | - if (minor >= MAX_MD_DEV) | |
6868 | - return -EINVAL; | |
6869 | + dev = inode->i_rdev; | |
6870 | + minor = MINOR(dev); | |
6871 | + if (minor >= MAX_MD_DEVS) | |
6872 | + return -EINVAL; | |
6873 | ||
6874 | - switch (cmd) | |
6875 | - { | |
6876 | - case REGISTER_DEV: | |
6877 | - return do_md_add (minor, to_kdev_t ((dev_t) arg)); | |
6878 | + /* | |
6879 | + * Commands dealing with the RAID driver but not any | |
6880 | + * particular array: | |
6881 | + */ | |
6882 | + switch (cmd) | |
6883 | + { | |
6884 | + case RAID_VERSION: | |
6885 | + err = get_version((void *)arg); | |
6886 | + goto done; | |
6887 | + | |
6888 | + case PRINT_RAID_DEBUG: | |
6889 | + err = 0; | |
6890 | + md_print_devices(); | |
6891 | + goto done_unlock; | |
6892 | + | |
6893 | + case BLKGETSIZE: /* Return device size */ | |
6894 | + if (!arg) { | |
6895 | + err = -EINVAL; | |
6896 | + goto abort; | |
6897 | + } | |
6898 | + err = md_put_user(md_hd_struct[minor].nr_sects, | |
6899 | + (long *) arg); | |
6900 | + goto done; | |
6901 | ||
6902 | - case START_MD: | |
6903 | - return do_md_run (minor, (int) arg); | |
6904 | + case BLKFLSBUF: | |
6905 | + fsync_dev(dev); | |
6906 | + invalidate_buffers(dev); | |
6907 | + goto done; | |
6908 | ||
6909 | - case STOP_MD: | |
6910 | - return do_md_stop (minor, inode); | |
6911 | - | |
6912 | - case BLKGETSIZE: /* Return device size */ | |
6913 | - if (!arg) return -EINVAL; | |
6914 | - err = put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg); | |
6915 | - if (err) | |
6916 | - return err; | |
6917 | - break; | |
6918 | - | |
6919 | - case BLKFLSBUF: | |
6920 | - fsync_dev (inode->i_rdev); | |
6921 | - invalidate_buffers (inode->i_rdev); | |
6922 | - break; | |
6923 | - | |
6924 | - case BLKRASET: | |
6925 | - if (arg > 0xff) | |
6926 | - return -EINVAL; | |
6927 | - read_ahead[MAJOR(inode->i_rdev)] = arg; | |
6928 | - return 0; | |
6929 | - | |
6930 | - case BLKRAGET: | |
6931 | - if (!arg) return -EINVAL; | |
6932 | - err = put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg); | |
6933 | - if (err) | |
6934 | - return err; | |
6935 | - break; | |
6936 | - | |
6937 | - /* We have a problem here : there is no easy way to give a CHS | |
6938 | - virtual geometry. We currently pretend that we have a 2 heads | |
6939 | - 4 sectors (with a BIG number of cylinders...). This drives dosfs | |
6940 | - just mad... ;-) */ | |
6941 | - | |
6942 | - case HDIO_GETGEO: | |
6943 | - if (!loc) return -EINVAL; | |
6944 | - err = put_user (2, (char *) &loc->heads); | |
6945 | - if (err) | |
6946 | - return err; | |
6947 | - err = put_user (4, (char *) &loc->sectors); | |
6948 | - if (err) | |
6949 | - return err; | |
6950 | - err = put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders); | |
6951 | - if (err) | |
6952 | - return err; | |
6953 | - err = put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect, | |
6954 | - (long *) &loc->start); | |
6955 | - if (err) | |
6956 | - return err; | |
6957 | - break; | |
6958 | - | |
6959 | - RO_IOCTLS(inode->i_rdev,arg); | |
6960 | + case BLKRASET: | |
6961 | + if (arg > 0xff) { | |
6962 | + err = -EINVAL; | |
6963 | + goto abort; | |
6964 | + } | |
6965 | + read_ahead[MAJOR(dev)] = arg; | |
6966 | + goto done; | |
6967 | ||
6968 | - default: | |
6969 | - return -EINVAL; | |
6970 | - } | |
6971 | + case BLKRAGET: | |
6972 | + if (!arg) { | |
6973 | + err = -EINVAL; | |
6974 | + goto abort; | |
6975 | + } | |
6976 | + err = md_put_user (read_ahead[ | |
6977 | + MAJOR(dev)], (long *) arg); | |
6978 | + goto done; | |
6979 | + default: | |
6980 | + } | |
6981 | + | |
6982 | + /* | |
6983 | + * Commands creating/starting a new array: | |
6984 | + */ | |
6985 | + | |
6986 | + mddev = kdev_to_mddev(dev); | |
6987 | + | |
6988 | + switch (cmd) | |
6989 | + { | |
6990 | + case SET_ARRAY_INFO: | |
6991 | + case START_ARRAY: | |
6992 | + if (mddev) { | |
6993 | + printk("array md%d already exists!\n", | |
6994 | + mdidx(mddev)); | |
6995 | + err = -EEXIST; | |
6996 | + goto abort; | |
6997 | + } | |
6998 | + default: | |
6999 | + } | |
7000 | + | |
7001 | + switch (cmd) | |
7002 | + { | |
7003 | + case SET_ARRAY_INFO: | |
7004 | + mddev = alloc_mddev(dev); | |
7005 | + if (!mddev) { | |
7006 | + err = -ENOMEM; | |
7007 | + goto abort; | |
7008 | + } | |
7009 | + /* | |
7010 | + * alloc_mddev() should possibly self-lock. | |
7011 | + */ | |
7012 | + err = lock_mddev(mddev); | |
7013 | + if (err) { | |
7014 | + printk("ioctl, reason %d, cmd %d\n", err, cmd); | |
7015 | + goto abort; | |
7016 | + } | |
7017 | + err = set_array_info(mddev, (void *)arg); | |
7018 | + if (err) { | |
7019 | + printk("couldnt set array info. %d\n", err); | |
7020 | + goto abort; | |
7021 | + } | |
7022 | + goto done_unlock; | |
7023 | + | |
7024 | + case START_ARRAY: | |
7025 | + /* | |
7026 | + * possibly make it lock the array ... | |
7027 | + */ | |
7028 | + err = autostart_array((kdev_t)arg); | |
7029 | + if (err) { | |
7030 | + printk("autostart %s failed!\n", | |
7031 | + partition_name((kdev_t)arg)); | |
7032 | + goto abort; | |
7033 | + } | |
7034 | + goto done; | |
7035 | + | |
7036 | + default: | |
7037 | + } | |
7038 | + | |
7039 | + /* | |
7040 | + * Commands querying/configuring an existing array: | |
7041 | + */ | |
7042 | + | |
7043 | + if (!mddev) { | |
7044 | + err = -ENODEV; | |
7045 | + goto abort; | |
7046 | + } | |
7047 | + err = lock_mddev(mddev); | |
7048 | + if (err) { | |
7049 | + printk("ioctl lock interrupted, reason %d, cmd %d\n",err, cmd); | |
7050 | + goto abort; | |
7051 | + } | |
7052 | + | |
7053 | + /* | |
7054 | + * Commands even a read-only array can execute: | |
7055 | + */ | |
7056 | + switch (cmd) | |
7057 | + { | |
7058 | + case GET_ARRAY_INFO: | |
7059 | + err = get_array_info(mddev, (void *)arg); | |
7060 | + goto done_unlock; | |
7061 | + | |
7062 | + case GET_DISK_INFO: | |
7063 | + err = get_disk_info(mddev, (void *)arg); | |
7064 | + goto done_unlock; | |
7065 | + | |
7066 | + case RESTART_ARRAY_RW: | |
7067 | + err = restart_array(mddev); | |
7068 | + goto done_unlock; | |
7069 | + | |
7070 | + case STOP_ARRAY: | |
7071 | + err = do_md_stop (mddev, 0); | |
7072 | + goto done_unlock; | |
7073 | + | |
7074 | + case STOP_ARRAY_RO: | |
7075 | + err = do_md_stop (mddev, 1); | |
7076 | + goto done_unlock; | |
7077 | + | |
7078 | + /* | |
7079 | + * We have a problem here : there is no easy way to give a CHS | |
7080 | + * virtual geometry. We currently pretend that we have a 2 heads | |
7081 | + * 4 sectors (with a BIG number of cylinders...). This drives | |
7082 | + * dosfs just mad... ;-) | |
7083 | + */ | |
7084 | + case HDIO_GETGEO: | |
7085 | + if (!loc) { | |
7086 | + err = -EINVAL; | |
7087 | + goto abort_unlock; | |
7088 | + } | |
7089 | + err = md_put_user (2, (char *) &loc->heads); | |
7090 | + if (err) | |
7091 | + goto abort_unlock; | |
7092 | + err = md_put_user (4, (char *) &loc->sectors); | |
7093 | + if (err) | |
7094 | + goto abort_unlock; | |
7095 | + err = md_put_user (md_hd_struct[mdidx(mddev)].nr_sects/8, | |
7096 | + (short *) &loc->cylinders); | |
7097 | + if (err) | |
7098 | + goto abort_unlock; | |
7099 | + err = md_put_user (md_hd_struct[minor].start_sect, | |
7100 | + (long *) &loc->start); | |
7101 | + goto done_unlock; | |
7102 | + } | |
7103 | ||
7104 | - return (0); | |
7105 | + /* | |
7106 | + * The remaining ioctls are changing the state of the | |
7107 | + * superblock, so we do not allow read-only arrays | |
7108 | + * here: | |
7109 | + */ | |
7110 | + if (mddev->ro) { | |
7111 | + err = -EROFS; | |
7112 | + goto abort_unlock; | |
7113 | + } | |
7114 | + | |
7115 | + switch (cmd) | |
7116 | + { | |
7117 | + case CLEAR_ARRAY: | |
7118 | + err = clear_array(mddev); | |
7119 | + goto done_unlock; | |
7120 | + | |
7121 | + case ADD_NEW_DISK: | |
7122 | + err = add_new_disk(mddev, (void *)arg); | |
7123 | + goto done_unlock; | |
7124 | + | |
7125 | + case HOT_REMOVE_DISK: | |
7126 | + err = hot_remove_disk(mddev, (kdev_t)arg); | |
7127 | + goto done_unlock; | |
7128 | + | |
7129 | + case HOT_ADD_DISK: | |
7130 | + err = hot_add_disk(mddev, (kdev_t)arg); | |
7131 | + goto done_unlock; | |
7132 | + | |
7133 | + case SET_DISK_INFO: | |
7134 | + err = set_disk_info(mddev, (void *)arg); | |
7135 | + goto done_unlock; | |
7136 | + | |
7137 | + case WRITE_RAID_INFO: | |
7138 | + err = write_raid_info(mddev); | |
7139 | + goto done_unlock; | |
7140 | + | |
7141 | + case UNPROTECT_ARRAY: | |
7142 | + err = unprotect_array(mddev); | |
7143 | + goto done_unlock; | |
7144 | + | |
7145 | + case PROTECT_ARRAY: | |
7146 | + err = protect_array(mddev); | |
7147 | + goto done_unlock; | |
7148 | + | |
7149 | + case SET_DISK_FAULTY: | |
7150 | + err = set_disk_faulty(mddev, (kdev_t)arg); | |
7151 | + goto done_unlock; | |
7152 | + | |
7153 | + case RUN_ARRAY: | |
7154 | + { | |
7155 | + mdu_param_t param; | |
7156 | + | |
7157 | + err = md_copy_from_user(¶m, (mdu_param_t *)arg, | |
7158 | + sizeof(param)); | |
7159 | + if (err) | |
7160 | + goto abort_unlock; | |
7161 | + | |
7162 | + err = do_md_run (mddev); | |
7163 | + /* | |
7164 | + * we have to clean up the mess if | |
7165 | + * the array cannot be run for some | |
7166 | + * reason ... | |
7167 | + */ | |
7168 | + if (err) { | |
7169 | + mddev->sb_dirty = 0; | |
7170 | + do_md_stop (mddev, 0); | |
7171 | + } | |
7172 | + goto done_unlock; | |
7173 | + } | |
7174 | + | |
7175 | + default: | |
7176 | + printk(KERN_WARNING "%s(pid %d) used obsolete MD ioctl, upgrade your software to use new ictls.\n", current->comm, current->pid); | |
7177 | + err = -EINVAL; | |
7178 | + goto abort_unlock; | |
7179 | + } | |
7180 | + | |
7181 | +done_unlock: | |
7182 | +abort_unlock: | |
7183 | + if (mddev) | |
7184 | + unlock_mddev(mddev); | |
7185 | + else | |
7186 | + printk("huh11?\n"); | |
7187 | + | |
7188 | + return err; | |
7189 | +done: | |
7190 | + if (err) | |
7191 | + printk("huh12?\n"); | |
7192 | +abort: | |
7193 | + return err; | |
7194 | } | |
7195 | ||
7196 | + | |
7197 | +#if LINUX_VERSION_CODE < LinuxVersionCode(2,1,0) | |
7198 | + | |
7199 | static int md_open (struct inode *inode, struct file *file) | |
7200 | { | |
7201 | - int minor=MINOR(inode->i_rdev); | |
7202 | + /* | |
7203 | + * Always succeed | |
7204 | + */ | |
7205 | + return (0); | |
7206 | +} | |
7207 | + | |
7208 | +static void md_release (struct inode *inode, struct file *file) | |
7209 | +{ | |
7210 | + sync_dev(inode->i_rdev); | |
7211 | +} | |
7212 | + | |
7213 | + | |
7214 | +static int md_read (struct inode *inode, struct file *file, | |
7215 | + char *buf, int count) | |
7216 | +{ | |
7217 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
7218 | ||
7219 | - md_dev[minor].busy++; | |
7220 | - return (0); /* Always succeed */ | |
7221 | + if (!mddev || !mddev->pers) | |
7222 | + return -ENXIO; | |
7223 | + | |
7224 | + return block_read (inode, file, buf, count); | |
7225 | } | |
7226 | ||
7227 | +static int md_write (struct inode *inode, struct file *file, | |
7228 | + const char *buf, int count) | |
7229 | +{ | |
7230 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
7231 | + | |
7232 | + if (!mddev || !mddev->pers) | |
7233 | + return -ENXIO; | |
7234 | ||
7235 | -static int md_release (struct inode *inode, struct file *file) | |
7236 | + return block_write (inode, file, buf, count); | |
7237 | +} | |
7238 | + | |
7239 | +static struct file_operations md_fops= | |
7240 | { | |
7241 | - int minor=MINOR(inode->i_rdev); | |
7242 | + NULL, | |
7243 | + md_read, | |
7244 | + md_write, | |
7245 | + NULL, | |
7246 | + NULL, | |
7247 | + md_ioctl, | |
7248 | + NULL, | |
7249 | + md_open, | |
7250 | + md_release, | |
7251 | + block_fsync | |
7252 | +}; | |
7253 | + | |
7254 | +#else | |
7255 | ||
7256 | - sync_dev (inode->i_rdev); | |
7257 | - md_dev[minor].busy--; | |
7258 | - return 0; | |
7259 | +static int md_open (struct inode *inode, struct file *file) | |
7260 | +{ | |
7261 | + /* | |
7262 | + * Always succeed | |
7263 | + */ | |
7264 | + return (0); | |
7265 | } | |
7266 | ||
7267 | +static int md_release (struct inode *inode, struct file *file) | |
7268 | +{ | |
7269 | + sync_dev(inode->i_rdev); | |
7270 | + return 0; | |
7271 | +} | |
7272 | ||
7273 | static ssize_t md_read (struct file *file, char *buf, size_t count, | |
7274 | loff_t *ppos) | |
7275 | { | |
7276 | - int minor=MINOR(file->f_dentry->d_inode->i_rdev); | |
7277 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
7278 | ||
7279 | - if (!md_dev[minor].pers) /* Check if device is being run */ | |
7280 | - return -ENXIO; | |
7281 | + if (!mddev || !mddev->pers) | |
7282 | + return -ENXIO; | |
7283 | ||
7284 | - return block_read(file, buf, count, ppos); | |
7285 | + return block_read(file, buf, count, ppos); | |
7286 | } | |
7287 | ||
7288 | static ssize_t md_write (struct file *file, const char *buf, | |
7289 | size_t count, loff_t *ppos) | |
7290 | { | |
7291 | - int minor=MINOR(file->f_dentry->d_inode->i_rdev); | |
7292 | + mddev_t *mddev = kdev_to_mddev(MD_FILE_TO_INODE(file)->i_rdev); | |
7293 | ||
7294 | - if (!md_dev[minor].pers) /* Check if device is being run */ | |
7295 | - return -ENXIO; | |
7296 | + if (!mddev || !mddev->pers) | |
7297 | + return -ENXIO; | |
7298 | ||
7299 | - return block_write(file, buf, count, ppos); | |
7300 | + return block_write(file, buf, count, ppos); | |
7301 | } | |
7302 | ||
7303 | static struct file_operations md_fops= | |
7304 | { | |
7305 | - NULL, | |
7306 | - md_read, | |
7307 | - md_write, | |
7308 | - NULL, | |
7309 | - NULL, | |
7310 | - md_ioctl, | |
7311 | - NULL, | |
7312 | - md_open, | |
7313 | - NULL, | |
7314 | - md_release, | |
7315 | - block_fsync | |
7316 | + NULL, | |
7317 | + md_read, | |
7318 | + md_write, | |
7319 | + NULL, | |
7320 | + NULL, | |
7321 | + md_ioctl, | |
7322 | + NULL, | |
7323 | + md_open, | |
7324 | + NULL, | |
7325 | + md_release, | |
7326 | + block_fsync | |
7327 | }; | |
7328 | ||
7329 | -int md_map (int minor, kdev_t *rdev, unsigned long *rsector, unsigned long size) | |
7330 | +#endif | |
7331 | + | |
7332 | +int md_map (kdev_t dev, kdev_t *rdev, | |
7333 | + unsigned long *rsector, unsigned long size) | |
7334 | { | |
7335 | - if ((unsigned int) minor >= MAX_MD_DEV) | |
7336 | - { | |
7337 | - printk ("Bad md device %d\n", minor); | |
7338 | - return (-1); | |
7339 | - } | |
7340 | - | |
7341 | - if (!md_dev[minor].pers) | |
7342 | - { | |
7343 | - printk ("Oops ! md%d not running, giving up !\n", minor); | |
7344 | - return (-1); | |
7345 | - } | |
7346 | + int err; | |
7347 | + mddev_t *mddev = kdev_to_mddev(dev); | |
7348 | ||
7349 | - return (md_dev[minor].pers->map(md_dev+minor, rdev, rsector, size)); | |
7350 | + if (!mddev || !mddev->pers) { | |
7351 | + err = -ENXIO; | |
7352 | + goto out; | |
7353 | + } | |
7354 | + | |
7355 | + err = mddev->pers->map(mddev, dev, rdev, rsector, size); | |
7356 | +out: | |
7357 | + return err; | |
7358 | } | |
7359 | ||
7360 | -int md_make_request (int minor, int rw, struct buffer_head * bh) | |
7361 | +int md_make_request (struct buffer_head * bh, int rw) | |
7362 | { | |
7363 | - if (md_dev [minor].pers->make_request) { | |
7364 | - if (buffer_locked(bh)) | |
7365 | - return 0; | |
7366 | + int err; | |
7367 | + mddev_t *mddev = kdev_to_mddev(bh->b_dev); | |
7368 | + | |
7369 | + if (!mddev || !mddev->pers) { | |
7370 | + err = -ENXIO; | |
7371 | + goto out; | |
7372 | + } | |
7373 | + | |
7374 | + if (mddev->pers->make_request) { | |
7375 | + if (buffer_locked(bh)) { | |
7376 | + err = 0; | |
7377 | + goto out; | |
7378 | + } | |
7379 | set_bit(BH_Lock, &bh->b_state); | |
7380 | if (rw == WRITE || rw == WRITEA) { | |
7381 | if (!buffer_dirty(bh)) { | |
7382 | - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); | |
7383 | - return 0; | |
7384 | + bh->b_end_io(bh, buffer_uptodate(bh)); | |
7385 | + err = 0; | |
7386 | + goto out; | |
7387 | } | |
7388 | } | |
7389 | if (rw == READ || rw == READA) { | |
7390 | if (buffer_uptodate(bh)) { | |
7391 | - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); | |
7392 | - return 0; | |
7393 | + bh->b_end_io(bh, buffer_uptodate(bh)); | |
7394 | + err = 0; | |
7395 | + goto out; | |
7396 | } | |
7397 | } | |
7398 | - return (md_dev[minor].pers->make_request(md_dev+minor, rw, bh)); | |
7399 | + err = mddev->pers->make_request(mddev, rw, bh); | |
7400 | } else { | |
7401 | make_request (MAJOR(bh->b_rdev), rw, bh); | |
7402 | - return 0; | |
7403 | + err = 0; | |
7404 | } | |
7405 | +out: | |
7406 | + return err; | |
7407 | } | |
7408 | ||
7409 | static void do_md_request (void) | |
7410 | { | |
7411 | - printk ("Got md request, not good..."); | |
7412 | - return; | |
7413 | + printk(KERN_ALERT "Got md request, not good..."); | |
7414 | + return; | |
7415 | +} | |
7416 | + | |
7417 | +int md_thread(void * arg) | |
7418 | +{ | |
7419 | + mdk_thread_t *thread = arg; | |
7420 | + | |
7421 | + md_lock_kernel(); | |
7422 | + exit_mm(current); | |
7423 | + exit_files(current); | |
7424 | + exit_fs(current); | |
7425 | + | |
7426 | + /* | |
7427 | + * Detach thread | |
7428 | + */ | |
7429 | + sys_setsid(); | |
7430 | + sprintf(current->comm, thread->name); | |
7431 | + md_init_signals(); | |
7432 | + md_flush_signals(); | |
7433 | + thread->tsk = current; | |
7434 | + | |
7435 | + /* | |
7436 | + * md_thread is a 'system-thread', it's priority should be very | |
7437 | + * high. We avoid resource deadlocks individually in each | |
7438 | + * raid personality. (RAID5 does preallocation) We also use RR and | |
7439 | + * the very same RT priority as kswapd, thus we will never get | |
7440 | + * into a priority inversion deadlock. | |
7441 | + * | |
7442 | + * we definitely have to have equal or higher priority than | |
7443 | + * bdflush, otherwise bdflush will deadlock if there are too | |
7444 | + * many dirty RAID5 blocks. | |
7445 | + */ | |
7446 | + current->policy = SCHED_OTHER; | |
7447 | + current->priority = 40; | |
7448 | + | |
7449 | + up(thread->sem); | |
7450 | + | |
7451 | + for (;;) { | |
7452 | + cli(); | |
7453 | + if (!test_bit(THREAD_WAKEUP, &thread->flags)) { | |
7454 | + if (!thread->run) | |
7455 | + break; | |
7456 | + interruptible_sleep_on(&thread->wqueue); | |
7457 | + } | |
7458 | + sti(); | |
7459 | + clear_bit(THREAD_WAKEUP, &thread->flags); | |
7460 | + if (thread->run) { | |
7461 | + thread->run(thread->data); | |
7462 | + run_task_queue(&tq_disk); | |
7463 | + } | |
7464 | + if (md_signal_pending(current)) { | |
7465 | + printk("%8s(%d) flushing signals.\n", current->comm, | |
7466 | + current->pid); | |
7467 | + md_flush_signals(); | |
7468 | + } | |
7469 | + } | |
7470 | + sti(); | |
7471 | + up(thread->sem); | |
7472 | + return 0; | |
7473 | } | |
7474 | ||
7475 | -void md_wakeup_thread(struct md_thread *thread) | |
7476 | +void md_wakeup_thread(mdk_thread_t *thread) | |
7477 | { | |
7478 | set_bit(THREAD_WAKEUP, &thread->flags); | |
7479 | wake_up(&thread->wqueue); | |
7480 | } | |
7481 | ||
7482 | -struct md_thread *md_register_thread (void (*run) (void *), void *data) | |
7483 | +mdk_thread_t *md_register_thread (void (*run) (void *), | |
7484 | + void *data, const char *name) | |
7485 | { | |
7486 | - struct md_thread *thread = (struct md_thread *) | |
7487 | - kmalloc(sizeof(struct md_thread), GFP_KERNEL); | |
7488 | + mdk_thread_t *thread; | |
7489 | int ret; | |
7490 | struct semaphore sem = MUTEX_LOCKED; | |
7491 | ||
7492 | - if (!thread) return NULL; | |
7493 | + thread = (mdk_thread_t *) kmalloc | |
7494 | + (sizeof(mdk_thread_t), GFP_KERNEL); | |
7495 | + if (!thread) | |
7496 | + return NULL; | |
7497 | ||
7498 | - memset(thread, 0, sizeof(struct md_thread)); | |
7499 | + memset(thread, 0, sizeof(mdk_thread_t)); | |
7500 | init_waitqueue(&thread->wqueue); | |
7501 | ||
7502 | thread->sem = &sem; | |
7503 | thread->run = run; | |
7504 | thread->data = data; | |
7505 | + thread->name = name; | |
7506 | ret = kernel_thread(md_thread, thread, 0); | |
7507 | if (ret < 0) { | |
7508 | kfree(thread); | |
7509 | @@ -836,270 +3032,407 @@ | |
7510 | return thread; | |
7511 | } | |
7512 | ||
7513 | -void md_unregister_thread (struct md_thread *thread) | |
7514 | +void md_interrupt_thread (mdk_thread_t *thread) | |
7515 | +{ | |
7516 | + if (!thread->tsk) { | |
7517 | + MD_BUG(); | |
7518 | + return; | |
7519 | + } | |
7520 | + printk("interrupting MD-thread pid %d\n", thread->tsk->pid); | |
7521 | + send_sig(SIGKILL, thread->tsk, 1); | |
7522 | +} | |
7523 | + | |
7524 | +void md_unregister_thread (mdk_thread_t *thread) | |
7525 | { | |
7526 | struct semaphore sem = MUTEX_LOCKED; | |
7527 | ||
7528 | thread->sem = &sem; | |
7529 | thread->run = NULL; | |
7530 | - if (thread->tsk) | |
7531 | - printk("Killing md_thread %d %p %s\n", | |
7532 | - thread->tsk->pid, thread->tsk, thread->tsk->comm); | |
7533 | - else | |
7534 | - printk("Aiee. md_thread has 0 tsk\n"); | |
7535 | - send_sig(SIGKILL, thread->tsk, 1); | |
7536 | - printk("downing on %p\n", &sem); | |
7537 | + thread->name = NULL; | |
7538 | + if (!thread->tsk) { | |
7539 | + MD_BUG(); | |
7540 | + return; | |
7541 | + } | |
7542 | + md_interrupt_thread(thread); | |
7543 | down(&sem); | |
7544 | } | |
7545 | ||
7546 | -#define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGTERM)) | |
7547 | - | |
7548 | -int md_thread(void * arg) | |
7549 | +void md_recover_arrays (void) | |
7550 | { | |
7551 | - struct md_thread *thread = arg; | |
7552 | - | |
7553 | - lock_kernel(); | |
7554 | - exit_mm(current); | |
7555 | - exit_files(current); | |
7556 | - exit_fs(current); | |
7557 | - | |
7558 | - current->session = 1; | |
7559 | - current->pgrp = 1; | |
7560 | - sprintf(current->comm, "md_thread"); | |
7561 | - siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); | |
7562 | - thread->tsk = current; | |
7563 | - up(thread->sem); | |
7564 | - | |
7565 | - for (;;) { | |
7566 | - cli(); | |
7567 | - if (!test_bit(THREAD_WAKEUP, &thread->flags)) { | |
7568 | - do { | |
7569 | - spin_lock(¤t->sigmask_lock); | |
7570 | - flush_signals(current); | |
7571 | - spin_unlock(¤t->sigmask_lock); | |
7572 | - interruptible_sleep_on(&thread->wqueue); | |
7573 | - cli(); | |
7574 | - if (test_bit(THREAD_WAKEUP, &thread->flags)) | |
7575 | - break; | |
7576 | - if (!thread->run) { | |
7577 | - sti(); | |
7578 | - up(thread->sem); | |
7579 | - return 0; | |
7580 | - } | |
7581 | - } while (signal_pending(current)); | |
7582 | - } | |
7583 | - sti(); | |
7584 | - clear_bit(THREAD_WAKEUP, &thread->flags); | |
7585 | - if (thread->run) { | |
7586 | - thread->run(thread->data); | |
7587 | - run_task_queue(&tq_disk); | |
7588 | - } | |
7589 | + if (!md_recovery_thread) { | |
7590 | + MD_BUG(); | |
7591 | + return; | |
7592 | } | |
7593 | + md_wakeup_thread(md_recovery_thread); | |
7594 | } | |
7595 | ||
7596 | -EXPORT_SYMBOL(md_size); | |
7597 | -EXPORT_SYMBOL(md_maxreadahead); | |
7598 | -EXPORT_SYMBOL(register_md_personality); | |
7599 | -EXPORT_SYMBOL(unregister_md_personality); | |
7600 | -EXPORT_SYMBOL(partition_name); | |
7601 | -EXPORT_SYMBOL(md_dev); | |
7602 | -EXPORT_SYMBOL(md_error); | |
7603 | -EXPORT_SYMBOL(md_register_thread); | |
7604 | -EXPORT_SYMBOL(md_unregister_thread); | |
7605 | -EXPORT_SYMBOL(md_update_sb); | |
7606 | -EXPORT_SYMBOL(md_map); | |
7607 | -EXPORT_SYMBOL(md_wakeup_thread); | |
7608 | -EXPORT_SYMBOL(md_do_sync); | |
7609 | ||
7610 | -#ifdef CONFIG_PROC_FS | |
7611 | -static struct proc_dir_entry proc_md = { | |
7612 | - PROC_MD, 6, "mdstat", | |
7613 | - S_IFREG | S_IRUGO, 1, 0, 0, | |
7614 | - 0, &proc_array_inode_operations, | |
7615 | -}; | |
7616 | +int md_error (kdev_t dev, kdev_t rdev) | |
7617 | +{ | |
7618 | + mddev_t *mddev = kdev_to_mddev(dev); | |
7619 | + mdk_rdev_t * rrdev; | |
7620 | + int rc; | |
7621 | + | |
7622 | + if (!mddev) { | |
7623 | + MD_BUG(); | |
7624 | + return 0; | |
7625 | + } | |
7626 | + rrdev = find_rdev(mddev, rdev); | |
7627 | + mark_rdev_faulty(rrdev); | |
7628 | + /* | |
7629 | + * if recovery was running, stop it now. | |
7630 | + */ | |
7631 | + if (mddev->pers->stop_resync) | |
7632 | + mddev->pers->stop_resync(mddev); | |
7633 | + if (mddev->recovery_running) | |
7634 | + md_interrupt_thread(md_recovery_thread); | |
7635 | + if (mddev->pers->error_handler) { | |
7636 | + rc = mddev->pers->error_handler(mddev, rdev); | |
7637 | + md_recover_arrays(); | |
7638 | + return rc; | |
7639 | + } | |
7640 | +#if 0 | |
7641 | + /* | |
7642 | + * Drop all buffers in the failed array. | |
7643 | + * _not_. This is called from IRQ handlers ... | |
7644 | + */ | |
7645 | + invalidate_buffers(rdev); | |
7646 | #endif | |
7647 | + return 0; | |
7648 | +} | |
7649 | ||
7650 | -static void md_geninit (struct gendisk *gdisk) | |
7651 | +static int status_unused (char * page) | |
7652 | { | |
7653 | - int i; | |
7654 | - | |
7655 | - for(i=0;i<MAX_MD_DEV;i++) | |
7656 | - { | |
7657 | - md_blocksizes[i] = 1024; | |
7658 | - md_maxreadahead[i] = MD_DEFAULT_DISK_READAHEAD; | |
7659 | - md_gendisk.part[i].start_sect=-1; /* avoid partition check */ | |
7660 | - md_gendisk.part[i].nr_sects=0; | |
7661 | - md_dev[i].pers=NULL; | |
7662 | - } | |
7663 | + int sz = 0, i = 0; | |
7664 | + mdk_rdev_t *rdev; | |
7665 | + struct md_list_head *tmp; | |
7666 | ||
7667 | - blksize_size[MD_MAJOR] = md_blocksizes; | |
7668 | - max_readahead[MD_MAJOR] = md_maxreadahead; | |
7669 | + sz += sprintf(page + sz, "unused devices: "); | |
7670 | ||
7671 | -#ifdef CONFIG_PROC_FS | |
7672 | - proc_register(&proc_root, &proc_md); | |
7673 | -#endif | |
7674 | + ITERATE_RDEV_ALL(rdev,tmp) { | |
7675 | + if (!rdev->same_set.next && !rdev->same_set.prev) { | |
7676 | + /* | |
7677 | + * The device is not yet used by any array. | |
7678 | + */ | |
7679 | + i++; | |
7680 | + sz += sprintf(page + sz, "%s ", | |
7681 | + partition_name(rdev->dev)); | |
7682 | + } | |
7683 | + } | |
7684 | + if (!i) | |
7685 | + sz += sprintf(page + sz, "<none>"); | |
7686 | + | |
7687 | + sz += sprintf(page + sz, "\n"); | |
7688 | + return sz; | |
7689 | } | |
7690 | ||
7691 | -int md_error (kdev_t mddev, kdev_t rdev) | |
7692 | + | |
7693 | +static int status_resync (char * page, mddev_t * mddev) | |
7694 | { | |
7695 | - unsigned int minor = MINOR (mddev); | |
7696 | - int rc; | |
7697 | + int sz = 0; | |
7698 | + unsigned int blocksize, max_blocks, resync, res, dt, tt, et; | |
7699 | ||
7700 | - if (MAJOR(mddev) != MD_MAJOR || minor > MAX_MD_DEV) | |
7701 | - panic ("md_error gets unknown device\n"); | |
7702 | - if (!md_dev [minor].pers) | |
7703 | - panic ("md_error gets an error for an unknown device\n"); | |
7704 | - if (md_dev [minor].pers->error_handler) { | |
7705 | - rc = md_dev [minor].pers->error_handler (md_dev+minor, rdev); | |
7706 | -#if SUPPORT_RECONSTRUCTION | |
7707 | - md_wakeup_thread(md_sync_thread); | |
7708 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
7709 | - return rc; | |
7710 | - } | |
7711 | - return 0; | |
7712 | + resync = mddev->curr_resync; | |
7713 | + blocksize = blksize_size[MD_MAJOR][mdidx(mddev)]; | |
7714 | + max_blocks = blk_size[MD_MAJOR][mdidx(mddev)] / (blocksize >> 10); | |
7715 | + | |
7716 | + /* | |
7717 | + * Should not happen. | |
7718 | + */ | |
7719 | + if (!max_blocks) { | |
7720 | + MD_BUG(); | |
7721 | + return 0; | |
7722 | + } | |
7723 | + res = resync*100/max_blocks; | |
7724 | + if (!mddev->recovery_running) | |
7725 | + /* | |
7726 | + * true resync | |
7727 | + */ | |
7728 | + sz += sprintf(page + sz, " resync=%u%%", res); | |
7729 | + else | |
7730 | + /* | |
7731 | + * recovery ... | |
7732 | + */ | |
7733 | + sz += sprintf(page + sz, " recovery=%u%%", res); | |
7734 | + | |
7735 | + /* | |
7736 | + * We do not want to overflow, so the order of operands and | |
7737 | + * the * 100 / 100 trick are important. We do a +1 to be | |
7738 | + * safe against division by zero. We only estimate anyway. | |
7739 | + * | |
7740 | + * dt: time until now | |
7741 | + * tt: total time | |
7742 | + * et: estimated finish time | |
7743 | + */ | |
7744 | + dt = ((jiffies - mddev->resync_start) / HZ); | |
7745 | + tt = (dt * (max_blocks / (resync/100+1)))/100; | |
7746 | + if (tt > dt) | |
7747 | + et = tt - dt; | |
7748 | + else | |
7749 | + /* | |
7750 | + * ignore rounding effects near finish time | |
7751 | + */ | |
7752 | + et = 0; | |
7753 | + | |
7754 | + sz += sprintf(page + sz, " finish=%u.%umin", et / 60, (et % 60)/6); | |
7755 | + | |
7756 | + return sz; | |
7757 | } | |
7758 | ||
7759 | int get_md_status (char *page) | |
7760 | { | |
7761 | - int sz=0, i, j, size; | |
7762 | - | |
7763 | - sz+=sprintf( page+sz, "Personalities : "); | |
7764 | - for (i=0; i<MAX_PERSONALITY; i++) | |
7765 | - if (pers[i]) | |
7766 | - sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name); | |
7767 | - | |
7768 | - page[sz-1]='\n'; | |
7769 | - | |
7770 | - sz+=sprintf (page+sz, "read_ahead "); | |
7771 | - if (read_ahead[MD_MAJOR]==INT_MAX) | |
7772 | - sz+=sprintf (page+sz, "not set\n"); | |
7773 | - else | |
7774 | - sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]); | |
7775 | + int sz = 0, j, size; | |
7776 | + struct md_list_head *tmp, *tmp2; | |
7777 | + mdk_rdev_t *rdev; | |
7778 | + mddev_t *mddev; | |
7779 | + | |
7780 | + sz += sprintf(page + sz, "Personalities : "); | |
7781 | + for (j = 0; j < MAX_PERSONALITY; j++) | |
7782 | + if (pers[j]) | |
7783 | + sz += sprintf(page+sz, "[%s] ", pers[j]->name); | |
7784 | + | |
7785 | + sz += sprintf(page+sz, "\n"); | |
7786 | + | |
7787 | + | |
7788 | + sz += sprintf(page+sz, "read_ahead "); | |
7789 | + if (read_ahead[MD_MAJOR] == INT_MAX) | |
7790 | + sz += sprintf(page+sz, "not set\n"); | |
7791 | + else | |
7792 | + sz += sprintf(page+sz, "%d sectors\n", read_ahead[MD_MAJOR]); | |
7793 | ||
7794 | - for (i=0; i<MAX_MD_DEV; i++) | |
7795 | - { | |
7796 | - sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in"); | |
7797 | - | |
7798 | - if (md_dev[i].pers) | |
7799 | - sz+=sprintf (page+sz, " %s", md_dev[i].pers->name); | |
7800 | + ITERATE_MDDEV(mddev,tmp) { | |
7801 | + sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev), | |
7802 | + mddev->pers ? "" : "in"); | |
7803 | + if (mddev->pers) { | |
7804 | + if (mddev->ro) | |
7805 | + sz += sprintf(page + sz, " (read-only)"); | |
7806 | + sz += sprintf(page + sz, " %s", mddev->pers->name); | |
7807 | + } | |
7808 | ||
7809 | - size=0; | |
7810 | - for (j=0; j<md_dev[i].nb_dev; j++) | |
7811 | - { | |
7812 | - sz+=sprintf (page+sz, " %s", | |
7813 | - partition_name(md_dev[i].devices[j].dev)); | |
7814 | - size+=md_dev[i].devices[j].size; | |
7815 | - } | |
7816 | + size = 0; | |
7817 | + ITERATE_RDEV(mddev,rdev,tmp2) { | |
7818 | + sz += sprintf(page + sz, " %s[%d]", | |
7819 | + partition_name(rdev->dev), rdev->desc_nr); | |
7820 | + if (rdev->faulty) { | |
7821 | + sz += sprintf(page + sz, "(F)"); | |
7822 | + continue; | |
7823 | + } | |
7824 | + size += rdev->size; | |
7825 | + } | |
7826 | ||
7827 | - if (md_dev[i].nb_dev) { | |
7828 | - if (md_dev[i].pers) | |
7829 | - sz+=sprintf (page+sz, " %d blocks", md_size[i]); | |
7830 | - else | |
7831 | - sz+=sprintf (page+sz, " %d blocks", size); | |
7832 | - } | |
7833 | + if (mddev->nb_dev) { | |
7834 | + if (mddev->pers) | |
7835 | + sz += sprintf(page + sz, " %d blocks", | |
7836 | + md_size[mdidx(mddev)]); | |
7837 | + else | |
7838 | + sz += sprintf(page + sz, " %d blocks", size); | |
7839 | + } | |
7840 | ||
7841 | - if (!md_dev[i].pers) | |
7842 | - { | |
7843 | - sz+=sprintf (page+sz, "\n"); | |
7844 | - continue; | |
7845 | - } | |
7846 | + if (!mddev->pers) { | |
7847 | + sz += sprintf(page+sz, "\n"); | |
7848 | + continue; | |
7849 | + } | |
7850 | ||
7851 | - if (md_dev[i].pers->max_invalid_dev) | |
7852 | - sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i)); | |
7853 | + sz += mddev->pers->status (page+sz, mddev); | |
7854 | ||
7855 | - sz+=md_dev[i].pers->status (page+sz, i, md_dev+i); | |
7856 | - sz+=sprintf (page+sz, "\n"); | |
7857 | - } | |
7858 | + if (mddev->curr_resync) | |
7859 | + sz += status_resync (page+sz, mddev); | |
7860 | + else { | |
7861 | + if (md_atomic_read(&mddev->resync_sem.count) != 1) | |
7862 | + sz += sprintf(page + sz, " resync=DELAYED"); | |
7863 | + } | |
7864 | + sz += sprintf(page + sz, "\n"); | |
7865 | + } | |
7866 | + sz += status_unused (page + sz); | |
7867 | ||
7868 | - return (sz); | |
7869 | + return (sz); | |
7870 | } | |
7871 | ||
7872 | -int register_md_personality (int p_num, struct md_personality *p) | |
7873 | +int register_md_personality (int pnum, mdk_personality_t *p) | |
7874 | { | |
7875 | - int i=(p_num >> PERSONALITY_SHIFT); | |
7876 | - | |
7877 | - if (i >= MAX_PERSONALITY) | |
7878 | - return -EINVAL; | |
7879 | + if (pnum >= MAX_PERSONALITY) | |
7880 | + return -EINVAL; | |
7881 | ||
7882 | - if (pers[i]) | |
7883 | - return -EBUSY; | |
7884 | + if (pers[pnum]) | |
7885 | + return -EBUSY; | |
7886 | ||
7887 | - pers[i]=p; | |
7888 | - printk ("%s personality registered\n", p->name); | |
7889 | - return 0; | |
7890 | + pers[pnum] = p; | |
7891 | + printk(KERN_INFO "%s personality registered\n", p->name); | |
7892 | + return 0; | |
7893 | } | |
7894 | ||
7895 | -int unregister_md_personality (int p_num) | |
7896 | +int unregister_md_personality (int pnum) | |
7897 | { | |
7898 | - int i=(p_num >> PERSONALITY_SHIFT); | |
7899 | - | |
7900 | - if (i >= MAX_PERSONALITY) | |
7901 | - return -EINVAL; | |
7902 | + if (pnum >= MAX_PERSONALITY) | |
7903 | + return -EINVAL; | |
7904 | ||
7905 | - printk ("%s personality unregistered\n", pers[i]->name); | |
7906 | - pers[i]=NULL; | |
7907 | - return 0; | |
7908 | + printk(KERN_INFO "%s personality unregistered\n", pers[pnum]->name); | |
7909 | + pers[pnum] = NULL; | |
7910 | + return 0; | |
7911 | } | |
7912 | ||
7913 | -static md_descriptor_t *get_spare(struct md_dev *mddev) | |
7914 | +static mdp_disk_t *get_spare(mddev_t *mddev) | |
7915 | { | |
7916 | - int i; | |
7917 | - md_superblock_t *sb = mddev->sb; | |
7918 | - md_descriptor_t *descriptor; | |
7919 | - struct real_dev *realdev; | |
7920 | - | |
7921 | - for (i = 0; i < mddev->nb_dev; i++) { | |
7922 | - realdev = &mddev->devices[i]; | |
7923 | - if (!realdev->sb) | |
7924 | + mdp_super_t *sb = mddev->sb; | |
7925 | + mdp_disk_t *disk; | |
7926 | + mdk_rdev_t *rdev; | |
7927 | + struct md_list_head *tmp; | |
7928 | + | |
7929 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
7930 | + if (rdev->faulty) | |
7931 | + continue; | |
7932 | + if (!rdev->sb) { | |
7933 | + MD_BUG(); | |
7934 | continue; | |
7935 | - descriptor = &sb->disks[realdev->sb->descriptor.number]; | |
7936 | - if (descriptor->state & (1 << MD_FAULTY_DEVICE)) | |
7937 | + } | |
7938 | + disk = &sb->disks[rdev->desc_nr]; | |
7939 | + if (disk_faulty(disk)) { | |
7940 | + MD_BUG(); | |
7941 | continue; | |
7942 | - if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) | |
7943 | + } | |
7944 | + if (disk_active(disk)) | |
7945 | continue; | |
7946 | - return descriptor; | |
7947 | + return disk; | |
7948 | } | |
7949 | return NULL; | |
7950 | } | |
7951 | ||
7952 | +static int is_mddev_idle (mddev_t *mddev) | |
7953 | +{ | |
7954 | + mdk_rdev_t * rdev; | |
7955 | + struct md_list_head *tmp; | |
7956 | + int idle; | |
7957 | + unsigned long curr_events; | |
7958 | + | |
7959 | + idle = 1; | |
7960 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
7961 | + curr_events = io_events[MAJOR(rdev->dev)]; | |
7962 | + | |
7963 | + if (curr_events != rdev->last_events) { | |
7964 | +// printk("!I(%d)", curr_events-rdev->last_events); | |
7965 | + rdev->last_events = curr_events; | |
7966 | + idle = 0; | |
7967 | + } | |
7968 | + } | |
7969 | + return idle; | |
7970 | +} | |
7971 | + | |
7972 | /* | |
7973 | * parallel resyncing thread. | |
7974 | - * | |
7975 | - * FIXME: - make it abort with a dirty array on mdstop, now it just blocks | |
7976 | - * - fix read error handing | |
7977 | */ | |
7978 | ||
7979 | -int md_do_sync(struct md_dev *mddev) | |
7980 | +/* | |
7981 | + * Determine correct block size for this device. | |
7982 | + */ | |
7983 | +unsigned int device_bsize (kdev_t dev) | |
7984 | +{ | |
7985 | + unsigned int i, correct_size; | |
7986 | + | |
7987 | + correct_size = BLOCK_SIZE; | |
7988 | + if (blksize_size[MAJOR(dev)]) { | |
7989 | + i = blksize_size[MAJOR(dev)][MINOR(dev)]; | |
7990 | + if (i) | |
7991 | + correct_size = i; | |
7992 | + } | |
7993 | + | |
7994 | + return correct_size; | |
7995 | +} | |
7996 | + | |
7997 | +static struct wait_queue *resync_wait = (struct wait_queue *)NULL; | |
7998 | + | |
7999 | +#define RA_ORDER (1) | |
8000 | +#define RA_PAGE_SIZE (PAGE_SIZE*(1<<RA_ORDER)) | |
8001 | +#define MAX_NR_BLOCKS (RA_PAGE_SIZE/sizeof(struct buffer_head *)) | |
8002 | + | |
8003 | +int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) | |
8004 | { | |
8005 | - struct buffer_head *bh; | |
8006 | - int max_blocks, blocksize, curr_bsize, percent=1, j; | |
8007 | - kdev_t read_disk = MKDEV(MD_MAJOR, mddev - md_dev); | |
8008 | + mddev_t *mddev2; | |
8009 | + struct buffer_head **bh; | |
8010 | + unsigned int max_blocks, blocksize, curr_bsize, | |
8011 | + i, ii, j, k, chunk, window, nr_blocks, err, serialize; | |
8012 | + kdev_t read_disk = mddev_to_kdev(mddev); | |
8013 | int major = MAJOR(read_disk), minor = MINOR(read_disk); | |
8014 | unsigned long starttime; | |
8015 | + int max_read_errors = 2*MAX_NR_BLOCKS, | |
8016 | + max_write_errors = 2*MAX_NR_BLOCKS; | |
8017 | + struct md_list_head *tmp; | |
8018 | + | |
8019 | +retry_alloc: | |
8020 | + bh = (struct buffer_head **) md__get_free_pages(GFP_KERNEL, RA_ORDER); | |
8021 | + if (!bh) { | |
8022 | + printk(KERN_ERR | |
8023 | + "could not alloc bh array for reconstruction ... retrying!\n"); | |
8024 | + goto retry_alloc; | |
8025 | + } | |
8026 | + | |
8027 | + err = down_interruptible(&mddev->resync_sem); | |
8028 | + if (err) | |
8029 | + goto out_nolock; | |
8030 | + | |
8031 | +recheck: | |
8032 | + serialize = 0; | |
8033 | + ITERATE_MDDEV(mddev2,tmp) { | |
8034 | + if (mddev2 == mddev) | |
8035 | + continue; | |
8036 | + if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) { | |
8037 | + printk(KERN_INFO "md: serializing resync, md%d has overlapping physical units with md%d!\n", mdidx(mddev), mdidx(mddev2)); | |
8038 | + serialize = 1; | |
8039 | + break; | |
8040 | + } | |
8041 | + } | |
8042 | + if (serialize) { | |
8043 | + interruptible_sleep_on(&resync_wait); | |
8044 | + if (md_signal_pending(current)) { | |
8045 | + md_flush_signals(); | |
8046 | + err = -EINTR; | |
8047 | + goto out; | |
8048 | + } | |
8049 | + goto recheck; | |
8050 | + } | |
8051 | + | |
8052 | + mddev->curr_resync = 1; | |
8053 | ||
8054 | - blocksize = blksize_size[major][minor]; | |
8055 | + blocksize = device_bsize(read_disk); | |
8056 | max_blocks = blk_size[major][minor] / (blocksize >> 10); | |
8057 | ||
8058 | - printk("... resync log\n"); | |
8059 | - printk(" .... mddev->nb_dev: %d\n", mddev->nb_dev); | |
8060 | - printk(" .... raid array: %s\n", kdevname(read_disk)); | |
8061 | - printk(" .... max_blocks: %d blocksize: %d\n", max_blocks, blocksize); | |
8062 | - printk("md: syncing RAID array %s\n", kdevname(read_disk)); | |
8063 | + printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev)); | |
8064 | + printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed: %d KB/sec.\n", | |
8065 | + sysctl_speed_limit); | |
8066 | + printk(KERN_INFO "md: using maximum available idle IO bandwith for reconstruction.\n"); | |
8067 | + | |
8068 | + /* | |
8069 | + * Resync has low priority. | |
8070 | + */ | |
8071 | + current->priority = 1; | |
8072 | + | |
8073 | + is_mddev_idle(mddev); /* this also initializes IO event counters */ | |
8074 | + starttime = jiffies; | |
8075 | + mddev->resync_start = starttime; | |
8076 | ||
8077 | - mddev->busy++; | |
8078 | + /* | |
8079 | + * Tune reconstruction: | |
8080 | + */ | |
8081 | + window = md_maxreadahead[mdidx(mddev)]/1024; | |
8082 | + nr_blocks = window / (blocksize >> 10); | |
8083 | + if (!nr_blocks || (nr_blocks > MAX_NR_BLOCKS)) | |
8084 | + nr_blocks = MAX_NR_BLOCKS; | |
8085 | + printk(KERN_INFO "md: using %dk window.\n",window); | |
8086 | ||
8087 | - starttime=jiffies; | |
8088 | - for (j = 0; j < max_blocks; j++) { | |
8089 | + for (j = 0; j < max_blocks; j += nr_blocks) { | |
8090 | ||
8091 | + if (j) | |
8092 | + mddev->curr_resync = j; | |
8093 | /* | |
8094 | * B careful. When some1 mounts a non-'blocksize' filesystem | |
8095 | * then we get the blocksize changed right under us. Go deal | |
8096 | * with it transparently, recalculate 'blocksize', 'j' and | |
8097 | * 'max_blocks': | |
8098 | */ | |
8099 | - curr_bsize = blksize_size[major][minor]; | |
8100 | + curr_bsize = device_bsize(read_disk); | |
8101 | if (curr_bsize != blocksize) { | |
8102 | - diff_blocksize: | |
8103 | + printk(KERN_INFO "md%d: blocksize changed\n", | |
8104 | + mdidx(mddev)); | |
8105 | +retry_read: | |
8106 | if (curr_bsize > blocksize) | |
8107 | /* | |
8108 | * this is safe, rounds downwards. | |
8109 | @@ -1109,114 +3442,384 @@ | |
8110 | j *= blocksize/curr_bsize; | |
8111 | ||
8112 | blocksize = curr_bsize; | |
8113 | + nr_blocks = window / (blocksize >> 10); | |
8114 | + if (!nr_blocks || (nr_blocks > MAX_NR_BLOCKS)) | |
8115 | + nr_blocks = MAX_NR_BLOCKS; | |
8116 | max_blocks = blk_size[major][minor] / (blocksize >> 10); | |
8117 | - } | |
8118 | - if ((bh = breada (read_disk, j, blocksize, j * blocksize, | |
8119 | - max_blocks * blocksize)) != NULL) { | |
8120 | - mark_buffer_dirty(bh, 1); | |
8121 | - brelse(bh); | |
8122 | - } else { | |
8123 | + printk("nr_blocks changed to %d (blocksize %d, j %d, max_blocks %d)\n", | |
8124 | + nr_blocks, blocksize, j, max_blocks); | |
8125 | /* | |
8126 | - * FIXME: Ugly, but set_blocksize() isnt safe ... | |
8127 | + * We will retry the current block-group | |
8128 | */ | |
8129 | - curr_bsize = blksize_size[major][minor]; | |
8130 | - if (curr_bsize != blocksize) | |
8131 | - goto diff_blocksize; | |
8132 | + } | |
8133 | ||
8134 | - /* | |
8135 | - * It's a real read problem. FIXME, handle this | |
8136 | - * a better way. | |
8137 | - */ | |
8138 | - printk ( KERN_ALERT | |
8139 | - "read error, stopping reconstruction.\n"); | |
8140 | - mddev->busy--; | |
8141 | - return 1; | |
8142 | + /* | |
8143 | + * Cleanup routines expect this | |
8144 | + */ | |
8145 | + for (k = 0; k < nr_blocks; k++) | |
8146 | + bh[k] = NULL; | |
8147 | + | |
8148 | + chunk = nr_blocks; | |
8149 | + if (chunk > max_blocks-j) | |
8150 | + chunk = max_blocks-j; | |
8151 | + | |
8152 | + /* | |
8153 | + * request buffer heads ... | |
8154 | + */ | |
8155 | + for (i = 0; i < chunk; i++) { | |
8156 | + bh[i] = getblk (read_disk, j+i, blocksize); | |
8157 | + if (!bh[i]) | |
8158 | + goto read_error; | |
8159 | + if (!buffer_dirty(bh[i])) | |
8160 | + mark_buffer_lowprio(bh[i]); | |
8161 | } | |
8162 | ||
8163 | /* | |
8164 | - * Let's sleep some if we are faster than our speed limit: | |
8165 | + * read buffer heads ... | |
8166 | */ | |
8167 | - while (blocksize*j/(jiffies-starttime+1)*HZ/1024 > SPEED_LIMIT) | |
8168 | - { | |
8169 | - current->state = TASK_INTERRUPTIBLE; | |
8170 | - schedule_timeout(1); | |
8171 | + ll_rw_block (READ, chunk, bh); | |
8172 | + run_task_queue(&tq_disk); | |
8173 | + | |
8174 | + /* | |
8175 | + * verify that all of them are OK ... | |
8176 | + */ | |
8177 | + for (i = 0; i < chunk; i++) { | |
8178 | + ii = chunk-i-1; | |
8179 | + wait_on_buffer(bh[ii]); | |
8180 | + if (!buffer_uptodate(bh[ii])) | |
8181 | + goto read_error; | |
8182 | + } | |
8183 | + | |
8184 | +retry_write: | |
8185 | + for (i = 0; i < chunk; i++) | |
8186 | + mark_buffer_dirty_lowprio(bh[i]); | |
8187 | + | |
8188 | + ll_rw_block(WRITE, chunk, bh); | |
8189 | + run_task_queue(&tq_disk); | |
8190 | + | |
8191 | + for (i = 0; i < chunk; i++) { | |
8192 | + ii = chunk-i-1; | |
8193 | + wait_on_buffer(bh[ii]); | |
8194 | + | |
8195 | + if (spare && disk_faulty(spare)) { | |
8196 | + for (k = 0; k < chunk; k++) | |
8197 | + brelse(bh[k]); | |
8198 | + printk(" <SPARE FAILED!>\n "); | |
8199 | + err = -EIO; | |
8200 | + goto out; | |
8201 | + } | |
8202 | + | |
8203 | + if (!buffer_uptodate(bh[ii])) { | |
8204 | + curr_bsize = device_bsize(read_disk); | |
8205 | + if (curr_bsize != blocksize) { | |
8206 | + printk(KERN_INFO | |
8207 | + "md%d: blocksize changed during write\n", | |
8208 | + mdidx(mddev)); | |
8209 | + for (k = 0; k < chunk; k++) | |
8210 | + if (bh[k]) { | |
8211 | + if (buffer_lowprio(bh[k])) | |
8212 | + mark_buffer_clean(bh[k]); | |
8213 | + brelse(bh[k]); | |
8214 | + } | |
8215 | + goto retry_read; | |
8216 | + } | |
8217 | + printk(" BAD WRITE %8d>\n", j); | |
8218 | + /* | |
8219 | + * Ouch, write error, retry or bail out. | |
8220 | + */ | |
8221 | + if (max_write_errors) { | |
8222 | + max_write_errors--; | |
8223 | + printk ( KERN_WARNING "md%d: write error while reconstructing, at block %u(%d).\n", mdidx(mddev), j, blocksize); | |
8224 | + goto retry_write; | |
8225 | + } | |
8226 | + printk ( KERN_ALERT | |
8227 | + "too many write errors, stopping reconstruction.\n"); | |
8228 | + for (k = 0; k < chunk; k++) | |
8229 | + if (bh[k]) { | |
8230 | + if (buffer_lowprio(bh[k])) | |
8231 | + mark_buffer_clean(bh[k]); | |
8232 | + brelse(bh[k]); | |
8233 | + } | |
8234 | + err = -EIO; | |
8235 | + goto out; | |
8236 | + } | |
8237 | } | |
8238 | ||
8239 | /* | |
8240 | - * FIXME: put this status bar thing into /proc | |
8241 | + * This is the normal 'everything went OK' case | |
8242 | + * do a 'free-behind' logic, we sure dont need | |
8243 | + * this buffer if it was the only user. | |
8244 | */ | |
8245 | - if (!(j%(max_blocks/100))) { | |
8246 | - if (!(percent%10)) | |
8247 | - printk (" %03d%% done.\n",percent); | |
8248 | + for (i = 0; i < chunk; i++) | |
8249 | + if (buffer_dirty(bh[i])) | |
8250 | + brelse(bh[i]); | |
8251 | else | |
8252 | - printk ("."); | |
8253 | - percent++; | |
8254 | + bforget(bh[i]); | |
8255 | + | |
8256 | + | |
8257 | + if (md_signal_pending(current)) { | |
8258 | + /* | |
8259 | + * got a signal, exit. | |
8260 | + */ | |
8261 | + mddev->curr_resync = 0; | |
8262 | + printk("md_do_sync() got signal ... exiting\n"); | |
8263 | + md_flush_signals(); | |
8264 | + err = -EINTR; | |
8265 | + goto out; | |
8266 | } | |
8267 | + | |
8268 | + /* | |
8269 | + * this loop exits only if either when we are slower than | |
8270 | + * the 'hard' speed limit, or the system was IO-idle for | |
8271 | + * a jiffy. | |
8272 | + * the system might be non-idle CPU-wise, but we only care | |
8273 | + * about not overloading the IO subsystem. (things like an | |
8274 | + * e2fsck being done on the RAID array should execute fast) | |
8275 | + */ | |
8276 | +repeat: | |
8277 | + if (md_need_resched(current)) | |
8278 | + schedule(); | |
8279 | + | |
8280 | + if ((blocksize/1024)*j/((jiffies-starttime)/HZ + 1) + 1 | |
8281 | + > sysctl_speed_limit) { | |
8282 | + current->priority = 1; | |
8283 | + | |
8284 | + if (!is_mddev_idle(mddev)) { | |
8285 | + current->state = TASK_INTERRUPTIBLE; | |
8286 | + md_schedule_timeout(HZ/2); | |
8287 | + if (!md_signal_pending(current)) | |
8288 | + goto repeat; | |
8289 | + } | |
8290 | + } else | |
8291 | + current->priority = 40; | |
8292 | } | |
8293 | fsync_dev(read_disk); | |
8294 | - printk("md: %s: sync done.\n", kdevname(read_disk)); | |
8295 | - mddev->busy--; | |
8296 | - return 0; | |
8297 | + printk(KERN_INFO "md: md%d: sync done.\n",mdidx(mddev)); | |
8298 | + err = 0; | |
8299 | + /* | |
8300 | + * this also signals 'finished resyncing' to md_stop | |
8301 | + */ | |
8302 | +out: | |
8303 | + up(&mddev->resync_sem); | |
8304 | +out_nolock: | |
8305 | + free_pages((unsigned long)bh, RA_ORDER); | |
8306 | + mddev->curr_resync = 0; | |
8307 | + wake_up(&resync_wait); | |
8308 | + return err; | |
8309 | + | |
8310 | +read_error: | |
8311 | + /* | |
8312 | + * set_blocksize() might change the blocksize. This | |
8313 | + * should not happen often, but it happens when eg. | |
8314 | + * someone mounts a filesystem that has non-1k | |
8315 | + * blocksize. set_blocksize() doesnt touch our | |
8316 | + * buffer, but to avoid aliasing problems we change | |
8317 | + * our internal blocksize too and retry the read. | |
8318 | + */ | |
8319 | + curr_bsize = device_bsize(read_disk); | |
8320 | + if (curr_bsize != blocksize) { | |
8321 | + printk(KERN_INFO "md%d: blocksize changed during read\n", | |
8322 | + mdidx(mddev)); | |
8323 | + for (k = 0; k < chunk; k++) | |
8324 | + if (bh[k]) { | |
8325 | + if (buffer_lowprio(bh[k])) | |
8326 | + mark_buffer_clean(bh[k]); | |
8327 | + brelse(bh[k]); | |
8328 | + } | |
8329 | + goto retry_read; | |
8330 | + } | |
8331 | + | |
8332 | + /* | |
8333 | + * It's a real read problem. We retry and bail out | |
8334 | + * only if it's excessive. | |
8335 | + */ | |
8336 | + if (max_read_errors) { | |
8337 | + max_read_errors--; | |
8338 | + printk ( KERN_WARNING "md%d: read error while reconstructing, at block %u(%d).\n", mdidx(mddev), j, blocksize); | |
8339 | + for (k = 0; k < chunk; k++) | |
8340 | + if (bh[k]) { | |
8341 | + if (buffer_lowprio(bh[k])) | |
8342 | + mark_buffer_clean(bh[k]); | |
8343 | + brelse(bh[k]); | |
8344 | + } | |
8345 | + goto retry_read; | |
8346 | + } | |
8347 | + printk ( KERN_ALERT "too many read errors, stopping reconstruction.\n"); | |
8348 | + for (k = 0; k < chunk; k++) | |
8349 | + if (bh[k]) { | |
8350 | + if (buffer_lowprio(bh[k])) | |
8351 | + mark_buffer_clean(bh[k]); | |
8352 | + brelse(bh[k]); | |
8353 | + } | |
8354 | + err = -EIO; | |
8355 | + goto out; | |
8356 | } | |
8357 | ||
8358 | +#undef MAX_NR_BLOCKS | |
8359 | + | |
8360 | /* | |
8361 | - * This is a kernel thread which: syncs a spare disk with the active array | |
8362 | + * This is a kernel thread which syncs a spare disk with the active array | |
8363 | * | |
8364 | * the amount of foolproofing might seem to be a tad excessive, but an | |
8365 | * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs | |
8366 | * of my root partition with the first 0.5 gigs of my /home partition ... so | |
8367 | * i'm a bit nervous ;) | |
8368 | */ | |
8369 | -void mdsyncd (void *data) | |
8370 | +void md_do_recovery (void *data) | |
8371 | { | |
8372 | - int i; | |
8373 | - struct md_dev *mddev; | |
8374 | - md_superblock_t *sb; | |
8375 | - md_descriptor_t *spare; | |
8376 | + int err; | |
8377 | + mddev_t *mddev; | |
8378 | + mdp_super_t *sb; | |
8379 | + mdp_disk_t *spare; | |
8380 | unsigned long flags; | |
8381 | + struct md_list_head *tmp; | |
8382 | ||
8383 | - for (i = 0, mddev = md_dev; i < MAX_MD_DEV; i++, mddev++) { | |
8384 | - if ((sb = mddev->sb) == NULL) | |
8385 | + printk(KERN_INFO "md: recovery thread got woken up ...\n"); | |
8386 | +restart: | |
8387 | + ITERATE_MDDEV(mddev,tmp) { | |
8388 | + sb = mddev->sb; | |
8389 | + if (!sb) | |
8390 | + continue; | |
8391 | + if (mddev->recovery_running) | |
8392 | continue; | |
8393 | if (sb->active_disks == sb->raid_disks) | |
8394 | continue; | |
8395 | - if (!sb->spare_disks) | |
8396 | + if (!sb->spare_disks) { | |
8397 | + printk(KERN_ERR "md%d: no spare disk to reconstruct array! -- continuing in degraded mode\n", mdidx(mddev)); | |
8398 | continue; | |
8399 | + } | |
8400 | + /* | |
8401 | + * now here we get the spare and resync it. | |
8402 | + */ | |
8403 | if ((spare = get_spare(mddev)) == NULL) | |
8404 | continue; | |
8405 | - if (!mddev->pers->mark_spare) | |
8406 | + printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", mdidx(mddev), partition_name(MKDEV(spare->major,spare->minor))); | |
8407 | + if (!mddev->pers->diskop) | |
8408 | continue; | |
8409 | - if (mddev->pers->mark_spare(mddev, spare, SPARE_WRITE)) | |
8410 | + if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) | |
8411 | continue; | |
8412 | - if (md_do_sync(mddev) || (spare->state & (1 << MD_FAULTY_DEVICE))) { | |
8413 | - mddev->pers->mark_spare(mddev, spare, SPARE_INACTIVE); | |
8414 | + down(&mddev->recovery_sem); | |
8415 | + mddev->recovery_running = 1; | |
8416 | + err = md_do_sync(mddev, spare); | |
8417 | + if (err == -EIO) { | |
8418 | + printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n", mdidx(mddev), partition_name(MKDEV(spare->major,spare->minor))); | |
8419 | + if (!disk_faulty(spare)) { | |
8420 | + mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE); | |
8421 | + mark_disk_faulty(spare); | |
8422 | + mark_disk_nonsync(spare); | |
8423 | + mark_disk_inactive(spare); | |
8424 | + sb->spare_disks--; | |
8425 | + sb->working_disks--; | |
8426 | + sb->failed_disks++; | |
8427 | + } | |
8428 | + } else | |
8429 | + if (disk_faulty(spare)) | |
8430 | + mddev->pers->diskop(mddev, &spare, | |
8431 | + DISKOP_SPARE_INACTIVE); | |
8432 | + if (err == -EINTR) { | |
8433 | + /* | |
8434 | + * Recovery got interrupted ... | |
8435 | + * signal back that we have finished using the array. | |
8436 | + */ | |
8437 | + mddev->pers->diskop(mddev, &spare, | |
8438 | + DISKOP_SPARE_INACTIVE); | |
8439 | + up(&mddev->recovery_sem); | |
8440 | + mddev->recovery_running = 0; | |
8441 | continue; | |
8442 | + } else { | |
8443 | + mddev->recovery_running = 0; | |
8444 | + up(&mddev->recovery_sem); | |
8445 | } | |
8446 | save_flags(flags); | |
8447 | cli(); | |
8448 | - mddev->pers->mark_spare(mddev, spare, SPARE_ACTIVE); | |
8449 | - spare->state |= (1 << MD_SYNC_DEVICE); | |
8450 | - spare->state |= (1 << MD_ACTIVE_DEVICE); | |
8451 | - sb->spare_disks--; | |
8452 | - sb->active_disks++; | |
8453 | - mddev->sb_dirty = 1; | |
8454 | - md_update_sb(mddev - md_dev); | |
8455 | + if (!disk_faulty(spare)) { | |
8456 | + /* | |
8457 | + * the SPARE_ACTIVE diskop possibly changes the | |
8458 | + * pointer too | |
8459 | + */ | |
8460 | + mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); | |
8461 | + mark_disk_sync(spare); | |
8462 | + mark_disk_active(spare); | |
8463 | + sb->active_disks++; | |
8464 | + sb->spare_disks--; | |
8465 | + } | |
8466 | restore_flags(flags); | |
8467 | + mddev->sb_dirty = 1; | |
8468 | + md_update_sb(mddev); | |
8469 | + goto restart; | |
8470 | } | |
8471 | + printk(KERN_INFO "md: recovery thread finished ...\n"); | |
8472 | ||
8473 | } | |
8474 | ||
8475 | +int md_notify_reboot(struct notifier_block *this, | |
8476 | + unsigned long code, void *x) | |
8477 | +{ | |
8478 | + struct md_list_head *tmp; | |
8479 | + mddev_t *mddev; | |
8480 | + | |
8481 | + if ((code == MD_SYS_DOWN) || (code == MD_SYS_HALT) | |
8482 | + || (code == MD_SYS_POWER_OFF)) { | |
8483 | + | |
8484 | + printk(KERN_INFO "stopping all md devices.\n"); | |
8485 | + | |
8486 | + ITERATE_MDDEV(mddev,tmp) | |
8487 | + do_md_stop (mddev, 1); | |
8488 | + /* | |
8489 | + * certain more exotic SCSI devices are known to be | |
8490 | + * volatile wrt too early system reboots. While the | |
8491 | + * right place to handle this issue is the given | |
8492 | + * driver, we do want to have a safe RAID driver ... | |
8493 | + */ | |
8494 | + md_mdelay(1000*1); | |
8495 | + } | |
8496 | + return NOTIFY_DONE; | |
8497 | +} | |
8498 | + | |
8499 | +struct notifier_block md_notifier = { | |
8500 | + md_notify_reboot, | |
8501 | + NULL, | |
8502 | + 0 | |
8503 | +}; | |
8504 | + | |
8505 | +md__initfunc(void raid_setup(char *str, int *ints)) | |
8506 | +{ | |
8507 | + char tmpline[100]; | |
8508 | + int len, pos, nr, i; | |
8509 | + | |
8510 | + len = strlen(str) + 1; | |
8511 | + nr = 0; | |
8512 | + pos = 0; | |
8513 | + | |
8514 | + for (i = 0; i < len; i++) { | |
8515 | + char c = str[i]; | |
8516 | + | |
8517 | + if (c == ',' || !c) { | |
8518 | + tmpline[pos] = 0; | |
8519 | + if (!strcmp(tmpline,"noautodetect")) | |
8520 | + raid_setup_args.noautodetect = 1; | |
8521 | + nr++; | |
8522 | + pos = 0; | |
8523 | + continue; | |
8524 | + } | |
8525 | + tmpline[pos] = c; | |
8526 | + pos++; | |
8527 | + } | |
8528 | + raid_setup_args.set = 1; | |
8529 | + return; | |
8530 | +} | |
8531 | + | |
8532 | #ifdef CONFIG_MD_BOOT | |
8533 | struct { | |
8534 | int set; | |
8535 | int ints[100]; | |
8536 | char str[100]; | |
8537 | -} md_setup_args __initdata = { | |
8538 | +} md_setup_args md__initdata = { | |
8539 | 0,{0},{0} | |
8540 | }; | |
8541 | ||
8542 | /* called from init/main.c */ | |
8543 | -__initfunc(void md_setup(char *str,int *ints)) | |
8544 | +md__initfunc(void md_setup(char *str,int *ints)) | |
8545 | { | |
8546 | int i; | |
8547 | for(i=0;i<=ints[0];i++) { | |
8548 | @@ -1228,21 +3831,24 @@ | |
8549 | return; | |
8550 | } | |
8551 | ||
8552 | -__initfunc(void do_md_setup(char *str,int *ints)) | |
8553 | +md__initfunc(void do_md_setup(char *str,int *ints)) | |
8554 | { | |
8555 | - int minor, pers, factor, fault; | |
8556 | +#if 0 | |
8557 | + int minor, pers, chunk_size, fault; | |
8558 | kdev_t dev; | |
8559 | int i=1; | |
8560 | ||
8561 | + printk("i plan to phase this out --mingo\n"); | |
8562 | + | |
8563 | if(ints[0] < 4) { | |
8564 | - printk ("md: Too few Arguments (%d).\n", ints[0]); | |
8565 | + printk (KERN_WARNING "md: Too few Arguments (%d).\n", ints[0]); | |
8566 | return; | |
8567 | } | |
8568 | ||
8569 | minor=ints[i++]; | |
8570 | ||
8571 | - if (minor >= MAX_MD_DEV) { | |
8572 | - printk ("md: Minor device number too high.\n"); | |
8573 | + if ((unsigned int)minor >= MAX_MD_DEVS) { | |
8574 | + printk (KERN_WARNING "md: Minor device number too high.\n"); | |
8575 | return; | |
8576 | } | |
8577 | ||
8578 | @@ -1252,18 +3858,20 @@ | |
8579 | case -1: | |
8580 | #ifdef CONFIG_MD_LINEAR | |
8581 | pers = LINEAR; | |
8582 | - printk ("md: Setting up md%d as linear device.\n",minor); | |
8583 | + printk (KERN_INFO "md: Setting up md%d as linear device.\n", | |
8584 | + minor); | |
8585 | #else | |
8586 | - printk ("md: Linear mode not configured." | |
8587 | + printk (KERN_WARNING "md: Linear mode not configured." | |
8588 | "Recompile the kernel with linear mode enabled!\n"); | |
8589 | #endif | |
8590 | break; | |
8591 | case 0: | |
8592 | pers = STRIPED; | |
8593 | #ifdef CONFIG_MD_STRIPED | |
8594 | - printk ("md: Setting up md%d as a striped device.\n",minor); | |
8595 | + printk (KERN_INFO "md: Setting up md%d as a striped device.\n", | |
8596 | + minor); | |
8597 | #else | |
8598 | - printk ("md: Striped mode not configured." | |
8599 | + printk (KERN_WARNING "md: Striped mode not configured." | |
8600 | "Recompile the kernel with striped mode enabled!\n"); | |
8601 | #endif | |
8602 | break; | |
8603 | @@ -1278,79 +3886,145 @@ | |
8604 | break; | |
8605 | */ | |
8606 | default: | |
8607 | - printk ("md: Unknown or not supported raid level %d.\n", ints[--i]); | |
8608 | + printk (KERN_WARNING "md: Unknown or not supported raid level %d.\n", ints[--i]); | |
8609 | return; | |
8610 | } | |
8611 | ||
8612 | - if(pers) { | |
8613 | + if (pers) { | |
8614 | ||
8615 | - factor=ints[i++]; /* Chunksize */ | |
8616 | - fault =ints[i++]; /* Faultlevel */ | |
8617 | + chunk_size = ints[i++]; /* Chunksize */ | |
8618 | + fault = ints[i++]; /* Faultlevel */ | |
8619 | ||
8620 | - pers=pers | factor | (fault << FAULT_SHIFT); | |
8621 | + pers = pers | chunk_size | (fault << FAULT_SHIFT); | |
8622 | ||
8623 | - while( str && (dev = name_to_kdev_t(str))) { | |
8624 | - do_md_add (minor, dev); | |
8625 | - if((str = strchr (str, ',')) != NULL) | |
8626 | - str++; | |
8627 | - } | |
8628 | + while( str && (dev = name_to_kdev_t(str))) { | |
8629 | + do_md_add (minor, dev); | |
8630 | + if((str = strchr (str, ',')) != NULL) | |
8631 | + str++; | |
8632 | + } | |
8633 | ||
8634 | - do_md_run (minor, pers); | |
8635 | - printk ("md: Loading md%d.\n",minor); | |
8636 | + do_md_run (minor, pers); | |
8637 | + printk (KERN_INFO "md: Loading md%d.\n",minor); | |
8638 | } | |
8639 | - | |
8640 | +#endif | |
8641 | } | |
8642 | #endif | |
8643 | ||
8644 | +void hsm_init (void); | |
8645 | +void translucent_init (void); | |
8646 | void linear_init (void); | |
8647 | void raid0_init (void); | |
8648 | void raid1_init (void); | |
8649 | void raid5_init (void); | |
8650 | ||
8651 | -__initfunc(int md_init (void)) | |
8652 | +md__initfunc(int md_init (void)) | |
8653 | { | |
8654 | - printk ("md driver %d.%d.%d MAX_MD_DEV=%d, MAX_REAL=%d\n", | |
8655 | - MD_MAJOR_VERSION, MD_MINOR_VERSION, MD_PATCHLEVEL_VERSION, | |
8656 | - MAX_MD_DEV, MAX_REAL); | |
8657 | - | |
8658 | - if (register_blkdev (MD_MAJOR, "md", &md_fops)) | |
8659 | - { | |
8660 | - printk ("Unable to get major %d for md\n", MD_MAJOR); | |
8661 | - return (-1); | |
8662 | - } | |
8663 | - | |
8664 | - blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST; | |
8665 | - blk_dev[MD_MAJOR].current_request=NULL; | |
8666 | - read_ahead[MD_MAJOR]=INT_MAX; | |
8667 | - memset(md_dev, 0, MAX_MD_DEV * sizeof (struct md_dev)); | |
8668 | - md_gendisk.next=gendisk_head; | |
8669 | - | |
8670 | - gendisk_head=&md_gendisk; | |
8671 | - | |
8672 | -#if SUPPORT_RECONSTRUCTION | |
8673 | - if ((md_sync_thread = md_register_thread(mdsyncd, NULL)) == NULL) | |
8674 | - printk("md: bug: md_sync_thread == NULL\n"); | |
8675 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
8676 | + static char * name = "mdrecoveryd"; | |
8677 | + | |
8678 | + printk (KERN_INFO "md driver %d.%d.%d MAX_MD_DEVS=%d, MAX_REAL=%d\n", | |
8679 | + MD_MAJOR_VERSION, MD_MINOR_VERSION, | |
8680 | + MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MAX_REAL); | |
8681 | + | |
8682 | + if (register_blkdev (MD_MAJOR, "md", &md_fops)) | |
8683 | + { | |
8684 | + printk (KERN_ALERT "Unable to get major %d for md\n", MD_MAJOR); | |
8685 | + return (-1); | |
8686 | + } | |
8687 | + | |
8688 | + blk_dev[MD_MAJOR].request_fn = DEVICE_REQUEST; | |
8689 | + blk_dev[MD_MAJOR].current_request = NULL; | |
8690 | + read_ahead[MD_MAJOR] = INT_MAX; | |
8691 | + md_gendisk.next = gendisk_head; | |
8692 | + | |
8693 | + gendisk_head = &md_gendisk; | |
8694 | + | |
8695 | + md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); | |
8696 | + if (!md_recovery_thread) | |
8697 | + printk(KERN_ALERT "bug: couldn't allocate md_recovery_thread\n"); | |
8698 | ||
8699 | + md_register_reboot_notifier(&md_notifier); | |
8700 | + md_register_sysctl(); | |
8701 | + | |
8702 | +#ifdef CONFIG_MD_HSM | |
8703 | + hsm_init (); | |
8704 | +#endif | |
8705 | +#ifdef CONFIG_MD_TRANSLUCENT | |
8706 | + translucent_init (); | |
8707 | +#endif | |
8708 | #ifdef CONFIG_MD_LINEAR | |
8709 | - linear_init (); | |
8710 | + linear_init (); | |
8711 | #endif | |
8712 | #ifdef CONFIG_MD_STRIPED | |
8713 | - raid0_init (); | |
8714 | + raid0_init (); | |
8715 | #endif | |
8716 | #ifdef CONFIG_MD_MIRRORING | |
8717 | - raid1_init (); | |
8718 | + raid1_init (); | |
8719 | #endif | |
8720 | #ifdef CONFIG_MD_RAID5 | |
8721 | - raid5_init (); | |
8722 | + raid5_init (); | |
8723 | +#endif | |
8724 | +#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE) | |
8725 | + /* | |
8726 | + * pick a XOR routine, runtime. | |
8727 | + */ | |
8728 | + calibrate_xor_block(); | |
8729 | #endif | |
8730 | - return (0); | |
8731 | + | |
8732 | + return (0); | |
8733 | } | |
8734 | ||
8735 | #ifdef CONFIG_MD_BOOT | |
8736 | -__initfunc(void md_setup_drive(void)) | |
8737 | +md__initfunc(void md_setup_drive(void)) | |
8738 | { | |
8739 | if(md_setup_args.set) | |
8740 | do_md_setup(md_setup_args.str, md_setup_args.ints); | |
8741 | } | |
8742 | #endif | |
8743 | + | |
8744 | +MD_EXPORT_SYMBOL(md_size); | |
8745 | +MD_EXPORT_SYMBOL(register_md_personality); | |
8746 | +MD_EXPORT_SYMBOL(unregister_md_personality); | |
8747 | +MD_EXPORT_SYMBOL(partition_name); | |
8748 | +MD_EXPORT_SYMBOL(md_error); | |
8749 | +MD_EXPORT_SYMBOL(md_recover_arrays); | |
8750 | +MD_EXPORT_SYMBOL(md_register_thread); | |
8751 | +MD_EXPORT_SYMBOL(md_unregister_thread); | |
8752 | +MD_EXPORT_SYMBOL(md_update_sb); | |
8753 | +MD_EXPORT_SYMBOL(md_map); | |
8754 | +MD_EXPORT_SYMBOL(md_wakeup_thread); | |
8755 | +MD_EXPORT_SYMBOL(md_do_sync); | |
8756 | +MD_EXPORT_SYMBOL(md_print_devices); | |
8757 | +MD_EXPORT_SYMBOL(find_rdev_nr); | |
8758 | +MD_EXPORT_SYMBOL(md_check_ordering); | |
8759 | +MD_EXPORT_SYMBOL(md_interrupt_thread); | |
8760 | +MD_EXPORT_SYMBOL(mddev_map); | |
8761 | + | |
8762 | +#ifdef CONFIG_PROC_FS | |
8763 | +static struct proc_dir_entry proc_md = { | |
8764 | + PROC_MD, 6, "mdstat", | |
8765 | + S_IFREG | S_IRUGO, 1, 0, 0, | |
8766 | + 0, &proc_array_inode_operations, | |
8767 | +}; | |
8768 | +#endif | |
8769 | + | |
8770 | +static void md_geninit (struct gendisk *gdisk) | |
8771 | +{ | |
8772 | + int i; | |
8773 | + | |
8774 | + for(i = 0; i < MAX_MD_DEVS; i++) { | |
8775 | + md_blocksizes[i] = 1024; | |
8776 | + md_maxreadahead[i] = MD_READAHEAD; | |
8777 | + md_gendisk.part[i].start_sect = -1; /* avoid partition check */ | |
8778 | + md_gendisk.part[i].nr_sects = 0; | |
8779 | + } | |
8780 | + | |
8781 | + printk("md.c: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); | |
8782 | + | |
8783 | + blksize_size[MD_MAJOR] = md_blocksizes; | |
8784 | + md_set_global_readahead(md_maxreadahead); | |
8785 | + | |
8786 | +#ifdef CONFIG_PROC_FS | |
8787 | + proc_register(&proc_root, &proc_md); | |
8788 | +#endif | |
8789 | +} | |
8790 | + | |
8791 | --- linux/drivers/block/raid0.c.orig Mon Sep 4 19:39:16 2000 | |
8792 | +++ linux/drivers/block/raid0.c Fri Nov 23 11:18:18 2001 | |
8793 | @@ -1,4 +1,3 @@ | |
8794 | - | |
8795 | /* | |
8796 | raid0.c : Multiple Devices driver for Linux | |
8797 | Copyright (C) 1994-96 Marc ZYNGIER | |
8798 | @@ -18,146 +17,201 @@ | |
8799 | */ | |
8800 | ||
8801 | #include <linux/module.h> | |
8802 | -#include <linux/md.h> | |
8803 | -#include <linux/raid0.h> | |
8804 | -#include <linux/vmalloc.h> | |
8805 | +#include <linux/raid/raid0.h> | |
8806 | ||
8807 | #define MAJOR_NR MD_MAJOR | |
8808 | #define MD_DRIVER | |
8809 | #define MD_PERSONALITY | |
8810 | ||
8811 | -static int create_strip_zones (int minor, struct md_dev *mddev) | |
8812 | +static int create_strip_zones (mddev_t *mddev) | |
8813 | { | |
8814 | - int i, j, c=0; | |
8815 | - int current_offset=0; | |
8816 | - struct real_dev *smallest_by_zone; | |
8817 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
8818 | - | |
8819 | - data->nr_strip_zones=1; | |
8820 | - | |
8821 | - for (i=1; i<mddev->nb_dev; i++) | |
8822 | - { | |
8823 | - for (j=0; j<i; j++) | |
8824 | - if (mddev->devices[i].size==mddev->devices[j].size) | |
8825 | - { | |
8826 | - c=1; | |
8827 | - break; | |
8828 | - } | |
8829 | - | |
8830 | - if (!c) | |
8831 | - data->nr_strip_zones++; | |
8832 | - | |
8833 | - c=0; | |
8834 | - } | |
8835 | - | |
8836 | - if ((data->strip_zone=vmalloc(sizeof(struct strip_zone)*data->nr_strip_zones)) == NULL) | |
8837 | - return 1; | |
8838 | - | |
8839 | - data->smallest=NULL; | |
8840 | - | |
8841 | - for (i=0; i<data->nr_strip_zones; i++) | |
8842 | - { | |
8843 | - data->strip_zone[i].dev_offset=current_offset; | |
8844 | - smallest_by_zone=NULL; | |
8845 | - c=0; | |
8846 | - | |
8847 | - for (j=0; j<mddev->nb_dev; j++) | |
8848 | - if (mddev->devices[j].size>current_offset) | |
8849 | - { | |
8850 | - data->strip_zone[i].dev[c++]=mddev->devices+j; | |
8851 | - if (!smallest_by_zone || | |
8852 | - smallest_by_zone->size > mddev->devices[j].size) | |
8853 | - smallest_by_zone=mddev->devices+j; | |
8854 | - } | |
8855 | - | |
8856 | - data->strip_zone[i].nb_dev=c; | |
8857 | - data->strip_zone[i].size=(smallest_by_zone->size-current_offset)*c; | |
8858 | - | |
8859 | - if (!data->smallest || | |
8860 | - data->smallest->size > data->strip_zone[i].size) | |
8861 | - data->smallest=data->strip_zone+i; | |
8862 | - | |
8863 | - data->strip_zone[i].zone_offset=i ? (data->strip_zone[i-1].zone_offset+ | |
8864 | - data->strip_zone[i-1].size) : 0; | |
8865 | - current_offset=smallest_by_zone->size; | |
8866 | - } | |
8867 | - return 0; | |
8868 | + int i, c, j, j1, j2; | |
8869 | + int current_offset, curr_zone_offset; | |
8870 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
8871 | + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; | |
8872 | + | |
8873 | + /* | |
8874 | + * The number of 'same size groups' | |
8875 | + */ | |
8876 | + conf->nr_strip_zones = 0; | |
8877 | + | |
8878 | + ITERATE_RDEV_ORDERED(mddev,rdev1,j1) { | |
8879 | + printk("raid0: looking at %s\n", partition_name(rdev1->dev)); | |
8880 | + c = 0; | |
8881 | + ITERATE_RDEV_ORDERED(mddev,rdev2,j2) { | |
8882 | + printk("raid0: comparing %s(%d) with %s(%d)\n", partition_name(rdev1->dev), rdev1->size, partition_name(rdev2->dev), rdev2->size); | |
8883 | + if (rdev2 == rdev1) { | |
8884 | + printk("raid0: END\n"); | |
8885 | + break; | |
8886 | + } | |
8887 | + if (rdev2->size == rdev1->size) | |
8888 | + { | |
8889 | + /* | |
8890 | + * Not unique, dont count it as a new | |
8891 | + * group | |
8892 | + */ | |
8893 | + printk("raid0: EQUAL\n"); | |
8894 | + c = 1; | |
8895 | + break; | |
8896 | + } | |
8897 | + printk("raid0: NOT EQUAL\n"); | |
8898 | + } | |
8899 | + if (!c) { | |
8900 | + printk("raid0: ==> UNIQUE\n"); | |
8901 | + conf->nr_strip_zones++; | |
8902 | + printk("raid0: %d zones\n", conf->nr_strip_zones); | |
8903 | + } | |
8904 | + } | |
8905 | + printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); | |
8906 | + | |
8907 | + conf->strip_zone = vmalloc(sizeof(struct strip_zone)* | |
8908 | + conf->nr_strip_zones); | |
8909 | + if (!conf->strip_zone) | |
8910 | + return 1; | |
8911 | + | |
8912 | + | |
8913 | + conf->smallest = NULL; | |
8914 | + current_offset = 0; | |
8915 | + curr_zone_offset = 0; | |
8916 | + | |
8917 | + for (i = 0; i < conf->nr_strip_zones; i++) | |
8918 | + { | |
8919 | + struct strip_zone *zone = conf->strip_zone + i; | |
8920 | + | |
8921 | + printk("zone %d\n", i); | |
8922 | + zone->dev_offset = current_offset; | |
8923 | + smallest = NULL; | |
8924 | + c = 0; | |
8925 | + | |
8926 | + ITERATE_RDEV_ORDERED(mddev,rdev,j) { | |
8927 | + | |
8928 | + printk(" checking %s ...", partition_name(rdev->dev)); | |
8929 | + if (rdev->size > current_offset) | |
8930 | + { | |
8931 | + printk(" contained as device %d\n", c); | |
8932 | + zone->dev[c] = rdev; | |
8933 | + c++; | |
8934 | + if (!smallest || (rdev->size <smallest->size)) { | |
8935 | + smallest = rdev; | |
8936 | + printk(" (%d) is smallest!.\n", rdev->size); | |
8937 | + } | |
8938 | + } else | |
8939 | + printk(" nope.\n"); | |
8940 | + } | |
8941 | + | |
8942 | + zone->nb_dev = c; | |
8943 | + zone->size = (smallest->size - current_offset) * c; | |
8944 | + printk(" zone->nb_dev: %d, size: %d\n",zone->nb_dev,zone->size); | |
8945 | + | |
8946 | + if (!conf->smallest || (zone->size < conf->smallest->size)) | |
8947 | + conf->smallest = zone; | |
8948 | + | |
8949 | + zone->zone_offset = curr_zone_offset; | |
8950 | + curr_zone_offset += zone->size; | |
8951 | + | |
8952 | + current_offset = smallest->size; | |
8953 | + printk("current zone offset: %d\n", current_offset); | |
8954 | + } | |
8955 | + printk("done.\n"); | |
8956 | + return 0; | |
8957 | } | |
8958 | ||
8959 | -static int raid0_run (int minor, struct md_dev *mddev) | |
8960 | +static int raid0_run (mddev_t *mddev) | |
8961 | { | |
8962 | - int cur=0, i=0, size, zone0_size, nb_zone; | |
8963 | - struct raid0_data *data; | |
8964 | - | |
8965 | - MOD_INC_USE_COUNT; | |
8966 | + int cur=0, i=0, size, zone0_size, nb_zone; | |
8967 | + raid0_conf_t *conf; | |
8968 | ||
8969 | - if ((mddev->private=vmalloc (sizeof (struct raid0_data))) == NULL) return 1; | |
8970 | - data=(struct raid0_data *) mddev->private; | |
8971 | - | |
8972 | - if (create_strip_zones (minor, mddev)) | |
8973 | - { | |
8974 | - vfree(data); | |
8975 | - return 1; | |
8976 | - } | |
8977 | - | |
8978 | - nb_zone=data->nr_zones= | |
8979 | - md_size[minor]/data->smallest->size + | |
8980 | - (md_size[minor]%data->smallest->size ? 1 : 0); | |
8981 | - | |
8982 | - printk ("raid0 : Allocating %ld bytes for hash.\n",(long)sizeof(struct raid0_hash)*nb_zone); | |
8983 | - if ((data->hash_table=vmalloc (sizeof (struct raid0_hash)*nb_zone)) == NULL) | |
8984 | - { | |
8985 | - vfree(data->strip_zone); | |
8986 | - vfree(data); | |
8987 | - return 1; | |
8988 | - } | |
8989 | - size=data->strip_zone[cur].size; | |
8990 | - | |
8991 | - i=0; | |
8992 | - while (cur<data->nr_strip_zones) | |
8993 | - { | |
8994 | - data->hash_table[i].zone0=data->strip_zone+cur; | |
8995 | - | |
8996 | - if (size>=data->smallest->size)/* If we completely fill the slot */ | |
8997 | - { | |
8998 | - data->hash_table[i++].zone1=NULL; | |
8999 | - size-=data->smallest->size; | |
9000 | - | |
9001 | - if (!size) | |
9002 | - { | |
9003 | - if (++cur==data->nr_strip_zones) continue; | |
9004 | - size=data->strip_zone[cur].size; | |
9005 | - } | |
9006 | - | |
9007 | - continue; | |
9008 | - } | |
9009 | - | |
9010 | - if (++cur==data->nr_strip_zones) /* Last dev, set unit1 as NULL */ | |
9011 | - { | |
9012 | - data->hash_table[i].zone1=NULL; | |
9013 | - continue; | |
9014 | - } | |
9015 | - | |
9016 | - zone0_size=size; /* Here, we use a 2nd dev to fill the slot */ | |
9017 | - size=data->strip_zone[cur].size; | |
9018 | - data->hash_table[i++].zone1=data->strip_zone+cur; | |
9019 | - size-=(data->smallest->size - zone0_size); | |
9020 | - } | |
9021 | + MOD_INC_USE_COUNT; | |
9022 | ||
9023 | - return (0); | |
9024 | + conf = vmalloc(sizeof (raid0_conf_t)); | |
9025 | + if (!conf) | |
9026 | + goto out; | |
9027 | + mddev->private = (void *)conf; | |
9028 | + | |
9029 | + if (md_check_ordering(mddev)) { | |
9030 | + printk("raid0: disks are not ordered, aborting!\n"); | |
9031 | + goto out_free_conf; | |
9032 | + } | |
9033 | + | |
9034 | + if (create_strip_zones (mddev)) | |
9035 | + goto out_free_conf; | |
9036 | + | |
9037 | + printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]); | |
9038 | + printk("raid0 : conf->smallest->size is %d blocks.\n", conf->smallest->size); | |
9039 | + nb_zone = md_size[mdidx(mddev)]/conf->smallest->size + | |
9040 | + (md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0); | |
9041 | + printk("raid0 : nb_zone is %d.\n", nb_zone); | |
9042 | + conf->nr_zones = nb_zone; | |
9043 | + | |
9044 | + printk("raid0 : Allocating %d bytes for hash.\n", | |
9045 | + sizeof(struct raid0_hash)*nb_zone); | |
9046 | + | |
9047 | + conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone); | |
9048 | + if (!conf->hash_table) | |
9049 | + goto out_free_zone_conf; | |
9050 | + size = conf->strip_zone[cur].size; | |
9051 | + | |
9052 | + i = 0; | |
9053 | + while (cur < conf->nr_strip_zones) { | |
9054 | + conf->hash_table[i].zone0 = conf->strip_zone + cur; | |
9055 | + | |
9056 | + /* | |
9057 | + * If we completely fill the slot | |
9058 | + */ | |
9059 | + if (size >= conf->smallest->size) { | |
9060 | + conf->hash_table[i++].zone1 = NULL; | |
9061 | + size -= conf->smallest->size; | |
9062 | + | |
9063 | + if (!size) { | |
9064 | + if (++cur == conf->nr_strip_zones) | |
9065 | + continue; | |
9066 | + size = conf->strip_zone[cur].size; | |
9067 | + } | |
9068 | + continue; | |
9069 | + } | |
9070 | + if (++cur == conf->nr_strip_zones) { | |
9071 | + /* | |
9072 | + * Last dev, set unit1 as NULL | |
9073 | + */ | |
9074 | + conf->hash_table[i].zone1=NULL; | |
9075 | + continue; | |
9076 | + } | |
9077 | + | |
9078 | + /* | |
9079 | + * Here we use a 2nd dev to fill the slot | |
9080 | + */ | |
9081 | + zone0_size = size; | |
9082 | + size = conf->strip_zone[cur].size; | |
9083 | + conf->hash_table[i++].zone1 = conf->strip_zone + cur; | |
9084 | + size -= (conf->smallest->size - zone0_size); | |
9085 | + } | |
9086 | + return 0; | |
9087 | + | |
9088 | +out_free_zone_conf: | |
9089 | + vfree(conf->strip_zone); | |
9090 | + conf->strip_zone = NULL; | |
9091 | + | |
9092 | +out_free_conf: | |
9093 | + vfree(conf); | |
9094 | + mddev->private = NULL; | |
9095 | +out: | |
9096 | + MOD_DEC_USE_COUNT; | |
9097 | + return 1; | |
9098 | } | |
9099 | ||
9100 | - | |
9101 | -static int raid0_stop (int minor, struct md_dev *mddev) | |
9102 | +static int raid0_stop (mddev_t *mddev) | |
9103 | { | |
9104 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
9105 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
9106 | ||
9107 | - vfree (data->hash_table); | |
9108 | - vfree (data->strip_zone); | |
9109 | - vfree (data); | |
9110 | + vfree (conf->hash_table); | |
9111 | + conf->hash_table = NULL; | |
9112 | + vfree (conf->strip_zone); | |
9113 | + conf->strip_zone = NULL; | |
9114 | + vfree (conf); | |
9115 | + mddev->private = NULL; | |
9116 | ||
9117 | - MOD_DEC_USE_COUNT; | |
9118 | - return 0; | |
9119 | + MOD_DEC_USE_COUNT; | |
9120 | + return 0; | |
9121 | } | |
9122 | ||
9123 | /* | |
9124 | @@ -167,135 +221,140 @@ | |
9125 | * Of course, those facts may not be valid anymore (and surely won't...) | |
9126 | * Hey guys, there's some work out there ;-) | |
9127 | */ | |
9128 | -static int raid0_map (struct md_dev *mddev, kdev_t *rdev, | |
9129 | +static int raid0_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
9130 | unsigned long *rsector, unsigned long size) | |
9131 | { | |
9132 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
9133 | - static struct raid0_hash *hash; | |
9134 | - struct strip_zone *zone; | |
9135 | - struct real_dev *tmp_dev; | |
9136 | - int blk_in_chunk, factor, chunk, chunk_size; | |
9137 | - long block, rblock; | |
9138 | - | |
9139 | - factor=FACTOR(mddev); | |
9140 | - chunk_size=(1UL << FACTOR_SHIFT(factor)); | |
9141 | - block=*rsector >> 1; | |
9142 | - hash=data->hash_table+(block/data->smallest->size); | |
9143 | - | |
9144 | - if (hash - data->hash_table > data->nr_zones) | |
9145 | - { | |
9146 | - printk(KERN_DEBUG "raid0_map: invalid block %li\n", block); | |
9147 | - return -1; | |
9148 | - } | |
9149 | - | |
9150 | - /* Sanity check */ | |
9151 | - if ((chunk_size*2)<(*rsector % (chunk_size*2))+size) | |
9152 | - { | |
9153 | - printk ("raid0_convert : can't convert block across chunks or bigger than %dk %ld %ld\n", chunk_size, *rsector, size); | |
9154 | - return (-1); | |
9155 | - } | |
9156 | - | |
9157 | - if (block >= (hash->zone0->size + | |
9158 | - hash->zone0->zone_offset)) | |
9159 | - { | |
9160 | - if (!hash->zone1) | |
9161 | - { | |
9162 | - printk ("raid0_convert : hash->zone1==NULL for block %ld\n", block); | |
9163 | - return (-1); | |
9164 | - } | |
9165 | - | |
9166 | - zone=hash->zone1; | |
9167 | - } | |
9168 | - else | |
9169 | - zone=hash->zone0; | |
9170 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
9171 | + struct raid0_hash *hash; | |
9172 | + struct strip_zone *zone; | |
9173 | + mdk_rdev_t *tmp_dev; | |
9174 | + int blk_in_chunk, chunksize_bits, chunk, chunk_size; | |
9175 | + long block, rblock; | |
9176 | + | |
9177 | + chunk_size = mddev->param.chunk_size >> 10; | |
9178 | + chunksize_bits = ffz(~chunk_size); | |
9179 | + block = *rsector >> 1; | |
9180 | + hash = conf->hash_table + block / conf->smallest->size; | |
9181 | + | |
9182 | + if (hash - conf->hash_table > conf->nr_zones) { | |
9183 | + printk(KERN_DEBUG "raid0_map: invalid block %lu\n", block); | |
9184 | + return -1; | |
9185 | + } | |
9186 | + | |
9187 | + /* Sanity check */ | |
9188 | + if ((chunk_size * 2) < (*rsector % (chunk_size * 2)) + size) | |
9189 | + goto bad_map; | |
9190 | + | |
9191 | + if (!hash) | |
9192 | + goto bad_hash; | |
9193 | + | |
9194 | + if (!hash->zone0) | |
9195 | + goto bad_zone0; | |
9196 | + | |
9197 | + if (block >= (hash->zone0->size + hash->zone0->zone_offset)) { | |
9198 | + if (!hash->zone1) | |
9199 | + goto bad_zone1; | |
9200 | + zone = hash->zone1; | |
9201 | + } else | |
9202 | + zone = hash->zone0; | |
9203 | ||
9204 | - blk_in_chunk=block & (chunk_size -1); | |
9205 | - chunk=(block - zone->zone_offset) / (zone->nb_dev<<FACTOR_SHIFT(factor)); | |
9206 | - tmp_dev=zone->dev[(block >> FACTOR_SHIFT(factor)) % zone->nb_dev]; | |
9207 | - rblock=(chunk << FACTOR_SHIFT(factor)) + blk_in_chunk + zone->dev_offset; | |
9208 | + blk_in_chunk = block & (chunk_size -1); | |
9209 | + chunk = (block - zone->zone_offset) / (zone->nb_dev << chunksize_bits); | |
9210 | + tmp_dev = zone->dev[(block >> chunksize_bits) % zone->nb_dev]; | |
9211 | + rblock = (chunk << chunksize_bits) + blk_in_chunk + zone->dev_offset; | |
9212 | ||
9213 | - *rdev=tmp_dev->dev; | |
9214 | - *rsector=rblock<<1; | |
9215 | + *rdev = tmp_dev->dev; | |
9216 | + *rsector = rblock << 1; | |
9217 | ||
9218 | - return (0); | |
9219 | + return 0; | |
9220 | + | |
9221 | +bad_map: | |
9222 | + printk ("raid0_map bug: can't convert block across chunks or bigger than %dk %ld %ld\n", chunk_size, *rsector, size); | |
9223 | + return -1; | |
9224 | +bad_hash: | |
9225 | + printk("raid0_map bug: hash==NULL for block %ld\n", block); | |
9226 | + return -1; | |
9227 | +bad_zone0: | |
9228 | + printk ("raid0_map bug: hash->zone0==NULL for block %ld\n", block); | |
9229 | + return -1; | |
9230 | +bad_zone1: | |
9231 | + printk ("raid0_map bug: hash->zone1==NULL for block %ld\n", block); | |
9232 | + return -1; | |
9233 | } | |
9234 | ||
9235 | ||
9236 | -static int raid0_status (char *page, int minor, struct md_dev *mddev) | |
9237 | +static int raid0_status (char *page, mddev_t *mddev) | |
9238 | { | |
9239 | - int sz=0; | |
9240 | + int sz = 0; | |
9241 | #undef MD_DEBUG | |
9242 | #ifdef MD_DEBUG | |
9243 | - int j, k; | |
9244 | - struct raid0_data *data=(struct raid0_data *) mddev->private; | |
9245 | + int j, k; | |
9246 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
9247 | ||
9248 | - sz+=sprintf (page+sz, " "); | |
9249 | - for (j=0; j<data->nr_zones; j++) | |
9250 | - { | |
9251 | - sz+=sprintf (page+sz, "[z%d", | |
9252 | - data->hash_table[j].zone0-data->strip_zone); | |
9253 | - if (data->hash_table[j].zone1) | |
9254 | - sz+=sprintf (page+sz, "/z%d] ", | |
9255 | - data->hash_table[j].zone1-data->strip_zone); | |
9256 | - else | |
9257 | - sz+=sprintf (page+sz, "] "); | |
9258 | - } | |
9259 | + sz += sprintf(page + sz, " "); | |
9260 | + for (j = 0; j < conf->nr_zones; j++) { | |
9261 | + sz += sprintf(page + sz, "[z%d", | |
9262 | + conf->hash_table[j].zone0 - conf->strip_zone); | |
9263 | + if (conf->hash_table[j].zone1) | |
9264 | + sz += sprintf(page+sz, "/z%d] ", | |
9265 | + conf->hash_table[j].zone1 - conf->strip_zone); | |
9266 | + else | |
9267 | + sz += sprintf(page+sz, "] "); | |
9268 | + } | |
9269 | ||
9270 | - sz+=sprintf (page+sz, "\n"); | |
9271 | + sz += sprintf(page + sz, "\n"); | |
9272 | ||
9273 | - for (j=0; j<data->nr_strip_zones; j++) | |
9274 | - { | |
9275 | - sz+=sprintf (page+sz, " z%d=[", j); | |
9276 | - for (k=0; k<data->strip_zone[j].nb_dev; k++) | |
9277 | - sz+=sprintf (page+sz, "%s/", | |
9278 | - partition_name(data->strip_zone[j].dev[k]->dev)); | |
9279 | - sz--; | |
9280 | - sz+=sprintf (page+sz, "] zo=%d do=%d s=%d\n", | |
9281 | - data->strip_zone[j].zone_offset, | |
9282 | - data->strip_zone[j].dev_offset, | |
9283 | - data->strip_zone[j].size); | |
9284 | - } | |
9285 | + for (j = 0; j < conf->nr_strip_zones; j++) { | |
9286 | + sz += sprintf(page + sz, " z%d=[", j); | |
9287 | + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | |
9288 | + sz += sprintf (page+sz, "%s/", partition_name( | |
9289 | + conf->strip_zone[j].dev[k]->dev)); | |
9290 | + sz--; | |
9291 | + sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n", | |
9292 | + conf->strip_zone[j].zone_offset, | |
9293 | + conf->strip_zone[j].dev_offset, | |
9294 | + conf->strip_zone[j].size); | |
9295 | + } | |
9296 | #endif | |
9297 | - sz+=sprintf (page+sz, " %dk chunks", 1<<FACTOR_SHIFT(FACTOR(mddev))); | |
9298 | - return sz; | |
9299 | + sz += sprintf(page + sz, " %dk chunks", mddev->param.chunk_size/1024); | |
9300 | + return sz; | |
9301 | } | |
9302 | ||
9303 | - | |
9304 | -static struct md_personality raid0_personality= | |
9305 | +static mdk_personality_t raid0_personality= | |
9306 | { | |
9307 | - "raid0", | |
9308 | - raid0_map, | |
9309 | - NULL, /* no special make_request */ | |
9310 | - NULL, /* no special end_request */ | |
9311 | - raid0_run, | |
9312 | - raid0_stop, | |
9313 | - raid0_status, | |
9314 | - NULL, /* no ioctls */ | |
9315 | - 0, | |
9316 | - NULL, /* no error_handler */ | |
9317 | - NULL, /* hot_add_disk */ | |
9318 | - NULL, /* hot_remove_disk */ | |
9319 | - NULL /* mark_spare */ | |
9320 | + "raid0", | |
9321 | + raid0_map, | |
9322 | + NULL, /* no special make_request */ | |
9323 | + NULL, /* no special end_request */ | |
9324 | + raid0_run, | |
9325 | + raid0_stop, | |
9326 | + raid0_status, | |
9327 | + NULL, /* no ioctls */ | |
9328 | + 0, | |
9329 | + NULL, /* no error_handler */ | |
9330 | + NULL, /* no diskop */ | |
9331 | + NULL, /* no stop resync */ | |
9332 | + NULL /* no restart resync */ | |
9333 | }; | |
9334 | ||
9335 | - | |
9336 | #ifndef MODULE | |
9337 | ||
9338 | void raid0_init (void) | |
9339 | { | |
9340 | - register_md_personality (RAID0, &raid0_personality); | |
9341 | + register_md_personality (RAID0, &raid0_personality); | |
9342 | } | |
9343 | ||
9344 | #else | |
9345 | ||
9346 | int init_module (void) | |
9347 | { | |
9348 | - return (register_md_personality (RAID0, &raid0_personality)); | |
9349 | + return (register_md_personality (RAID0, &raid0_personality)); | |
9350 | } | |
9351 | ||
9352 | void cleanup_module (void) | |
9353 | { | |
9354 | - unregister_md_personality (RAID0); | |
9355 | + unregister_md_personality (RAID0); | |
9356 | } | |
9357 | ||
9358 | #endif | |
9359 | + | |
9360 | --- linux/drivers/block/raid1.c.orig Mon Dec 11 01:49:41 2000 | |
9361 | +++ linux/drivers/block/raid1.c Fri Nov 23 11:18:18 2001 | |
9362 | @@ -1,6 +1,6 @@ | |
9363 | -/************************************************************************ | |
9364 | +/* | |
9365 | * raid1.c : Multiple Devices driver for Linux | |
9366 | - * Copyright (C) 1996 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
9367 | + * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
9368 | * | |
9369 | * RAID-1 management functions. | |
9370 | * | |
9371 | @@ -15,50 +15,55 @@ | |
9372 | */ | |
9373 | ||
9374 | #include <linux/module.h> | |
9375 | -#include <linux/locks.h> | |
9376 | #include <linux/malloc.h> | |
9377 | -#include <linux/md.h> | |
9378 | -#include <linux/raid1.h> | |
9379 | -#include <asm/bitops.h> | |
9380 | +#include <linux/raid/raid1.h> | |
9381 | #include <asm/atomic.h> | |
9382 | ||
9383 | #define MAJOR_NR MD_MAJOR | |
9384 | #define MD_DRIVER | |
9385 | #define MD_PERSONALITY | |
9386 | ||
9387 | -/* | |
9388 | - * The following can be used to debug the driver | |
9389 | - */ | |
9390 | -/*#define RAID1_DEBUG*/ | |
9391 | -#ifdef RAID1_DEBUG | |
9392 | -#define PRINTK(x) do { printk x; } while (0); | |
9393 | -#else | |
9394 | -#define PRINTK(x) do { ; } while (0); | |
9395 | -#endif | |
9396 | +#define MAX_LINEAR_SECTORS 128 | |
9397 | ||
9398 | #define MAX(a,b) ((a) > (b) ? (a) : (b)) | |
9399 | #define MIN(a,b) ((a) < (b) ? (a) : (b)) | |
9400 | ||
9401 | -static struct md_personality raid1_personality; | |
9402 | -static struct md_thread *raid1_thread = NULL; | |
9403 | +static mdk_personality_t raid1_personality; | |
9404 | struct buffer_head *raid1_retry_list = NULL; | |
9405 | ||
9406 | -static int __raid1_map (struct md_dev *mddev, kdev_t *rdev, | |
9407 | +static void * raid1_kmalloc (int size) | |
9408 | +{ | |
9409 | + void * ptr; | |
9410 | + /* | |
9411 | + * now we are rather fault tolerant than nice, but | |
9412 | + * there are a couple of places in the RAID code where we | |
9413 | + * simply can not afford to fail an allocation because | |
9414 | + * there is no failure return path (eg. make_request()) | |
9415 | + */ | |
9416 | + while (!(ptr = kmalloc (sizeof (raid1_conf_t), GFP_BUFFER))) { | |
9417 | + printk ("raid1: out of memory, retrying...\n"); | |
9418 | + current->state = TASK_UNINTERRUPTIBLE; | |
9419 | + schedule_timeout(HZ/10); | |
9420 | + } | |
9421 | + | |
9422 | + memset(ptr, 0, size); | |
9423 | + return ptr; | |
9424 | +} | |
9425 | + | |
9426 | +static int __raid1_map (mddev_t *mddev, kdev_t *rdev, | |
9427 | unsigned long *rsector, unsigned long size) | |
9428 | { | |
9429 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
9430 | - int i, n = raid_conf->raid_disks; | |
9431 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
9432 | + int i, disks = MD_SB_DISKS; | |
9433 | ||
9434 | /* | |
9435 | * Later we do read balancing on the read side | |
9436 | * now we use the first available disk. | |
9437 | */ | |
9438 | ||
9439 | - PRINTK(("raid1_map().\n")); | |
9440 | - | |
9441 | - for (i=0; i<n; i++) { | |
9442 | - if (raid_conf->mirrors[i].operational) { | |
9443 | - *rdev = raid_conf->mirrors[i].dev; | |
9444 | + for (i = 0; i < disks; i++) { | |
9445 | + if (conf->mirrors[i].operational) { | |
9446 | + *rdev = conf->mirrors[i].dev; | |
9447 | return (0); | |
9448 | } | |
9449 | } | |
9450 | @@ -67,29 +72,29 @@ | |
9451 | return (-1); | |
9452 | } | |
9453 | ||
9454 | -static int raid1_map (struct md_dev *mddev, kdev_t *rdev, | |
9455 | +static int raid1_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
9456 | unsigned long *rsector, unsigned long size) | |
9457 | { | |
9458 | return 0; | |
9459 | } | |
9460 | ||
9461 | -void raid1_reschedule_retry (struct buffer_head *bh) | |
9462 | +static void raid1_reschedule_retry (struct buffer_head *bh) | |
9463 | { | |
9464 | struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_dev_id); | |
9465 | - | |
9466 | - PRINTK(("raid1_reschedule_retry().\n")); | |
9467 | + mddev_t *mddev = r1_bh->mddev; | |
9468 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
9469 | ||
9470 | r1_bh->next_retry = raid1_retry_list; | |
9471 | raid1_retry_list = bh; | |
9472 | - md_wakeup_thread(raid1_thread); | |
9473 | + md_wakeup_thread(conf->thread); | |
9474 | } | |
9475 | ||
9476 | /* | |
9477 | - * raid1_end_buffer_io() is called when we have finished servicing a mirrored | |
9478 | + * raid1_end_bh_io() is called when we have finished servicing a mirrored | |
9479 | * operation and are ready to return a success/failure code to the buffer | |
9480 | * cache layer. | |
9481 | */ | |
9482 | -static inline void raid1_end_buffer_io(struct raid1_bh *r1_bh, int uptodate) | |
9483 | +static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate) | |
9484 | { | |
9485 | struct buffer_head *bh = r1_bh->master_bh; | |
9486 | ||
9487 | @@ -97,8 +102,6 @@ | |
9488 | kfree(r1_bh); | |
9489 | } | |
9490 | ||
9491 | -int raid1_one_error=0; | |
9492 | - | |
9493 | void raid1_end_request (struct buffer_head *bh, int uptodate) | |
9494 | { | |
9495 | struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_dev_id); | |
9496 | @@ -106,12 +109,7 @@ | |
9497 | ||
9498 | save_flags(flags); | |
9499 | cli(); | |
9500 | - PRINTK(("raid1_end_request().\n")); | |
9501 | ||
9502 | - if (raid1_one_error) { | |
9503 | - raid1_one_error=0; | |
9504 | - uptodate=0; | |
9505 | - } | |
9506 | /* | |
9507 | * this branch is our 'one mirror IO has finished' event handler: | |
9508 | */ | |
9509 | @@ -136,15 +134,11 @@ | |
9510 | */ | |
9511 | ||
9512 | if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) { | |
9513 | - | |
9514 | - PRINTK(("raid1_end_request(), read branch.\n")); | |
9515 | - | |
9516 | /* | |
9517 | * we have only one buffer_head on the read side | |
9518 | */ | |
9519 | if (uptodate) { | |
9520 | - PRINTK(("raid1_end_request(), read branch, uptodate.\n")); | |
9521 | - raid1_end_buffer_io(r1_bh, uptodate); | |
9522 | + raid1_end_bh_io(r1_bh, uptodate); | |
9523 | restore_flags(flags); | |
9524 | return; | |
9525 | } | |
9526 | @@ -152,71 +146,56 @@ | |
9527 | * oops, read error: | |
9528 | */ | |
9529 | printk(KERN_ERR "raid1: %s: rescheduling block %lu\n", | |
9530 | - kdevname(bh->b_dev), bh->b_blocknr); | |
9531 | - raid1_reschedule_retry (bh); | |
9532 | + partition_name(bh->b_dev), bh->b_blocknr); | |
9533 | + raid1_reschedule_retry(bh); | |
9534 | restore_flags(flags); | |
9535 | return; | |
9536 | } | |
9537 | ||
9538 | /* | |
9539 | - * WRITE or WRITEA. | |
9540 | - */ | |
9541 | - PRINTK(("raid1_end_request(), write branch.\n")); | |
9542 | - | |
9543 | - /* | |
9544 | + * WRITE: | |
9545 | + * | |
9546 | * Let's see if all mirrored write operations have finished | |
9547 | - * already [we have irqs off, so we can decrease]: | |
9548 | + * already. | |
9549 | */ | |
9550 | ||
9551 | - if (!--r1_bh->remaining) { | |
9552 | - struct md_dev *mddev = r1_bh->mddev; | |
9553 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
9554 | - int i, n = raid_conf->raid_disks; | |
9555 | - | |
9556 | - PRINTK(("raid1_end_request(), remaining == 0.\n")); | |
9557 | + if (atomic_dec_and_test(&r1_bh->remaining)) { | |
9558 | + int i, disks = MD_SB_DISKS; | |
9559 | ||
9560 | - for ( i=0; i<n; i++) | |
9561 | - if (r1_bh->mirror_bh[i]) kfree(r1_bh->mirror_bh[i]); | |
9562 | + for ( i = 0; i < disks; i++) | |
9563 | + if (r1_bh->mirror_bh[i]) | |
9564 | + kfree(r1_bh->mirror_bh[i]); | |
9565 | ||
9566 | - raid1_end_buffer_io(r1_bh, test_bit(BH_Uptodate, &r1_bh->state)); | |
9567 | + raid1_end_bh_io(r1_bh, test_bit(BH_Uptodate, &r1_bh->state)); | |
9568 | } | |
9569 | - else PRINTK(("raid1_end_request(), remaining == %u.\n", r1_bh->remaining)); | |
9570 | restore_flags(flags); | |
9571 | } | |
9572 | ||
9573 | -/* This routine checks if the undelying device is an md device and in that | |
9574 | - * case it maps the blocks before putting the request on the queue | |
9575 | +/* | |
9576 | + * This routine checks if the undelying device is an md device | |
9577 | + * and in that case it maps the blocks before putting the | |
9578 | + * request on the queue | |
9579 | */ | |
9580 | -static inline void | |
9581 | -map_and_make_request (int rw, struct buffer_head *bh) | |
9582 | +static void map_and_make_request (int rw, struct buffer_head *bh) | |
9583 | { | |
9584 | if (MAJOR (bh->b_rdev) == MD_MAJOR) | |
9585 | - md_map (MINOR (bh->b_rdev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9); | |
9586 | + md_map (bh->b_rdev, &bh->b_rdev, | |
9587 | + &bh->b_rsector, bh->b_size >> 9); | |
9588 | clear_bit(BH_Lock, &bh->b_state); | |
9589 | make_request (MAJOR (bh->b_rdev), rw, bh); | |
9590 | } | |
9591 | ||
9592 | -static int | |
9593 | -raid1_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh) | |
9594 | +static int raid1_make_request (mddev_t *mddev, int rw, | |
9595 | + struct buffer_head * bh) | |
9596 | { | |
9597 | - | |
9598 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
9599 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
9600 | struct buffer_head *mirror_bh[MD_SB_DISKS], *bh_req; | |
9601 | struct raid1_bh * r1_bh; | |
9602 | - int n = raid_conf->raid_disks, i, sum_bhs = 0, switch_disks = 0, sectors; | |
9603 | + int disks = MD_SB_DISKS; | |
9604 | + int i, sum_bhs = 0, switch_disks = 0, sectors, lowprio = 0; | |
9605 | struct mirror_info *mirror; | |
9606 | ||
9607 | - PRINTK(("raid1_make_request().\n")); | |
9608 | - | |
9609 | - while (!( /* FIXME: now we are rather fault tolerant than nice */ | |
9610 | - r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_BUFFER) | |
9611 | - ) ) | |
9612 | - { | |
9613 | - printk ("raid1_make_request(#1): out of memory\n"); | |
9614 | - current->policy |= SCHED_YIELD; | |
9615 | - schedule(); | |
9616 | - } | |
9617 | - memset (r1_bh, 0, sizeof (struct raid1_bh)); | |
9618 | + r1_bh = raid1_kmalloc (sizeof (struct raid1_bh)); | |
9619 | ||
9620 | /* | |
9621 | * make_request() can abort the operation when READA or WRITEA are being | |
9622 | @@ -227,43 +206,65 @@ | |
9623 | if (rw == READA) rw = READ; | |
9624 | if (rw == WRITEA) rw = WRITE; | |
9625 | ||
9626 | - if (rw == WRITE || rw == WRITEA) | |
9627 | - mark_buffer_clean(bh); /* Too early ? */ | |
9628 | + if (rw == WRITE) { | |
9629 | + /* | |
9630 | + * Too early ? | |
9631 | + */ | |
9632 | + mark_buffer_clean(bh); | |
9633 | + /* | |
9634 | + * not too early. we _first_ clean the bh, then we start | |
9635 | + * the IO, then when the IO has finished, we unlock the | |
9636 | + * bh and mark it uptodate. This way we do not miss the | |
9637 | + * case when the bh got dirty again during the IO. | |
9638 | + */ | |
9639 | + } | |
9640 | + | |
9641 | + /* | |
9642 | + * special flag for 'lowprio' reconstruction requests ... | |
9643 | + */ | |
9644 | + if (buffer_lowprio(bh)) | |
9645 | + lowprio = 1; | |
9646 | ||
9647 | /* | |
9648 | - * i think the read and write branch should be separated completely, since we want | |
9649 | - * to do read balancing on the read side for example. Comments? :) --mingo | |
9650 | + * i think the read and write branch should be separated completely, | |
9651 | + * since we want to do read balancing on the read side for example. | |
9652 | + * Comments? :) --mingo | |
9653 | */ | |
9654 | ||
9655 | r1_bh->master_bh=bh; | |
9656 | r1_bh->mddev=mddev; | |
9657 | r1_bh->cmd = rw; | |
9658 | ||
9659 | - if (rw==READ || rw==READA) { | |
9660 | - int last_used = raid_conf->last_used; | |
9661 | - PRINTK(("raid1_make_request(), read branch.\n")); | |
9662 | - mirror = raid_conf->mirrors + last_used; | |
9663 | + if (rw==READ) { | |
9664 | + int last_used = conf->last_used; | |
9665 | + | |
9666 | + /* | |
9667 | + * read balancing logic: | |
9668 | + */ | |
9669 | + mirror = conf->mirrors + last_used; | |
9670 | bh->b_rdev = mirror->dev; | |
9671 | sectors = bh->b_size >> 9; | |
9672 | - if (bh->b_blocknr * sectors == raid_conf->next_sect) { | |
9673 | - raid_conf->sect_count += sectors; | |
9674 | - if (raid_conf->sect_count >= mirror->sect_limit) | |
9675 | + | |
9676 | + if (bh->b_blocknr * sectors == conf->next_sect) { | |
9677 | + conf->sect_count += sectors; | |
9678 | + if (conf->sect_count >= mirror->sect_limit) | |
9679 | switch_disks = 1; | |
9680 | } else | |
9681 | switch_disks = 1; | |
9682 | - raid_conf->next_sect = (bh->b_blocknr + 1) * sectors; | |
9683 | - if (switch_disks) { | |
9684 | - PRINTK(("read-balancing: switching %d -> %d (%d sectors)\n", last_used, mirror->next, raid_conf->sect_count)); | |
9685 | - raid_conf->sect_count = 0; | |
9686 | - last_used = raid_conf->last_used = mirror->next; | |
9687 | + conf->next_sect = (bh->b_blocknr + 1) * sectors; | |
9688 | + /* | |
9689 | + * Do not switch disks if full resync is in progress ... | |
9690 | + */ | |
9691 | + if (switch_disks && !conf->resync_mirrors) { | |
9692 | + conf->sect_count = 0; | |
9693 | + last_used = conf->last_used = mirror->next; | |
9694 | /* | |
9695 | - * Do not switch to write-only disks ... resyncing | |
9696 | - * is in progress | |
9697 | + * Do not switch to write-only disks ... | |
9698 | + * reconstruction is in progress | |
9699 | */ | |
9700 | - while (raid_conf->mirrors[last_used].write_only) | |
9701 | - raid_conf->last_used = raid_conf->mirrors[last_used].next; | |
9702 | + while (conf->mirrors[last_used].write_only) | |
9703 | + conf->last_used = conf->mirrors[last_used].next; | |
9704 | } | |
9705 | - PRINTK (("raid1 read queue: %d %d\n", MAJOR (bh->b_rdev), MINOR (bh->b_rdev))); | |
9706 | bh_req = &r1_bh->bh_req; | |
9707 | memcpy(bh_req, bh, sizeof(*bh)); | |
9708 | bh_req->b_end_io = raid1_end_request; | |
9709 | @@ -273,13 +274,12 @@ | |
9710 | } | |
9711 | ||
9712 | /* | |
9713 | - * WRITE or WRITEA. | |
9714 | + * WRITE: | |
9715 | */ | |
9716 | - PRINTK(("raid1_make_request(n=%d), write branch.\n",n)); | |
9717 | ||
9718 | - for (i = 0; i < n; i++) { | |
9719 | + for (i = 0; i < disks; i++) { | |
9720 | ||
9721 | - if (!raid_conf->mirrors [i].operational) { | |
9722 | + if (!conf->mirrors[i].operational) { | |
9723 | /* | |
9724 | * the r1_bh->mirror_bh[i] pointer remains NULL | |
9725 | */ | |
9726 | @@ -287,89 +287,91 @@ | |
9727 | continue; | |
9728 | } | |
9729 | ||
9730 | + /* | |
9731 | + * special case for reconstruction ... | |
9732 | + */ | |
9733 | + if (lowprio && (i == conf->last_used)) { | |
9734 | + mirror_bh[i] = NULL; | |
9735 | + continue; | |
9736 | + } | |
9737 | + | |
9738 | + /* | |
9739 | + * We should use a private pool (size depending on NR_REQUEST), | |
9740 | + * to avoid writes filling up the memory with bhs | |
9741 | + * | |
9742 | + * Such pools are much faster than kmalloc anyways (so we waste | |
9743 | + * almost nothing by not using the master bh when writing and | |
9744 | + * win alot of cleanness) but for now we are cool enough. --mingo | |
9745 | + * | |
9746 | + * It's safe to sleep here, buffer heads cannot be used in a shared | |
9747 | + * manner in the write branch. Look how we lock the buffer at the | |
9748 | + * beginning of this function to grok the difference ;) | |
9749 | + */ | |
9750 | + mirror_bh[i] = raid1_kmalloc(sizeof(struct buffer_head)); | |
9751 | + /* | |
9752 | + * prepare mirrored bh (fields ordered for max mem throughput): | |
9753 | + */ | |
9754 | + mirror_bh[i]->b_blocknr = bh->b_blocknr; | |
9755 | + mirror_bh[i]->b_dev = bh->b_dev; | |
9756 | + mirror_bh[i]->b_rdev = conf->mirrors[i].dev; | |
9757 | + mirror_bh[i]->b_rsector = bh->b_rsector; | |
9758 | + mirror_bh[i]->b_state = (1<<BH_Req) | (1<<BH_Dirty); | |
9759 | + if (lowprio) | |
9760 | + mirror_bh[i]->b_state |= (1<<BH_LowPrio); | |
9761 | + | |
9762 | + mirror_bh[i]->b_count = 1; | |
9763 | + mirror_bh[i]->b_size = bh->b_size; | |
9764 | + mirror_bh[i]->b_data = bh->b_data; | |
9765 | + mirror_bh[i]->b_list = BUF_LOCKED; | |
9766 | + mirror_bh[i]->b_end_io = raid1_end_request; | |
9767 | + mirror_bh[i]->b_dev_id = r1_bh; | |
9768 | + | |
9769 | + r1_bh->mirror_bh[i] = mirror_bh[i]; | |
9770 | + sum_bhs++; | |
9771 | + } | |
9772 | + | |
9773 | + md_atomic_set(&r1_bh->remaining, sum_bhs); | |
9774 | + | |
9775 | /* | |
9776 | - * We should use a private pool (size depending on NR_REQUEST), | |
9777 | - * to avoid writes filling up the memory with bhs | |
9778 | - * | |
9779 | - * Such pools are much faster than kmalloc anyways (so we waste almost | |
9780 | - * nothing by not using the master bh when writing and win alot of cleanness) | |
9781 | - * | |
9782 | - * but for now we are cool enough. --mingo | |
9783 | - * | |
9784 | - * It's safe to sleep here, buffer heads cannot be used in a shared | |
9785 | - * manner in the write branch. Look how we lock the buffer at the beginning | |
9786 | - * of this function to grok the difference ;) | |
9787 | - */ | |
9788 | - while (!( /* FIXME: now we are rather fault tolerant than nice */ | |
9789 | - mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_BUFFER) | |
9790 | - ) ) | |
9791 | - { | |
9792 | - printk ("raid1_make_request(#2): out of memory\n"); | |
9793 | - current->policy |= SCHED_YIELD; | |
9794 | - schedule(); | |
9795 | - } | |
9796 | - memset (mirror_bh[i], 0, sizeof (struct buffer_head)); | |
9797 | - | |
9798 | - /* | |
9799 | - * prepare mirrored bh (fields ordered for max mem throughput): | |
9800 | - */ | |
9801 | - mirror_bh [i]->b_blocknr = bh->b_blocknr; | |
9802 | - mirror_bh [i]->b_dev = bh->b_dev; | |
9803 | - mirror_bh [i]->b_rdev = raid_conf->mirrors [i].dev; | |
9804 | - mirror_bh [i]->b_rsector = bh->b_rsector; | |
9805 | - mirror_bh [i]->b_state = (1<<BH_Req) | (1<<BH_Dirty); | |
9806 | - mirror_bh [i]->b_count = 1; | |
9807 | - mirror_bh [i]->b_size = bh->b_size; | |
9808 | - mirror_bh [i]->b_data = bh->b_data; | |
9809 | - mirror_bh [i]->b_list = BUF_LOCKED; | |
9810 | - mirror_bh [i]->b_end_io = raid1_end_request; | |
9811 | - mirror_bh [i]->b_dev_id = r1_bh; | |
9812 | - | |
9813 | - r1_bh->mirror_bh[i] = mirror_bh[i]; | |
9814 | - sum_bhs++; | |
9815 | - } | |
9816 | - | |
9817 | - r1_bh->remaining = sum_bhs; | |
9818 | - | |
9819 | - PRINTK(("raid1_make_request(), write branch, sum_bhs=%d.\n",sum_bhs)); | |
9820 | - | |
9821 | - /* | |
9822 | - * We have to be a bit careful about the semaphore above, thats why we | |
9823 | - * start the requests separately. Since kmalloc() could fail, sleep and | |
9824 | - * make_request() can sleep too, this is the safer solution. Imagine, | |
9825 | - * end_request decreasing the semaphore before we could have set it up ... | |
9826 | - * We could play tricks with the semaphore (presetting it and correcting | |
9827 | - * at the end if sum_bhs is not 'n' but we have to do end_request by hand | |
9828 | - * if all requests finish until we had a chance to set up the semaphore | |
9829 | - * correctly ... lots of races). | |
9830 | - */ | |
9831 | - for (i = 0; i < n; i++) | |
9832 | - if (mirror_bh [i] != NULL) | |
9833 | - map_and_make_request (rw, mirror_bh [i]); | |
9834 | + * We have to be a bit careful about the semaphore above, thats | |
9835 | + * why we start the requests separately. Since kmalloc() could | |
9836 | + * fail, sleep and make_request() can sleep too, this is the | |
9837 | + * safer solution. Imagine, end_request decreasing the semaphore | |
9838 | + * before we could have set it up ... We could play tricks with | |
9839 | + * the semaphore (presetting it and correcting at the end if | |
9840 | + * sum_bhs is not 'n' but we have to do end_request by hand if | |
9841 | + * all requests finish until we had a chance to set up the | |
9842 | + * semaphore correctly ... lots of races). | |
9843 | + */ | |
9844 | + for (i = 0; i < disks; i++) | |
9845 | + if (mirror_bh[i]) | |
9846 | + map_and_make_request(rw, mirror_bh[i]); | |
9847 | ||
9848 | return (0); | |
9849 | } | |
9850 | ||
9851 | -static int raid1_status (char *page, int minor, struct md_dev *mddev) | |
9852 | +static int raid1_status (char *page, mddev_t *mddev) | |
9853 | { | |
9854 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
9855 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
9856 | int sz = 0, i; | |
9857 | ||
9858 | - sz += sprintf (page+sz, " [%d/%d] [", raid_conf->raid_disks, raid_conf->working_disks); | |
9859 | - for (i = 0; i < raid_conf->raid_disks; i++) | |
9860 | - sz += sprintf (page+sz, "%s", raid_conf->mirrors [i].operational ? "U" : "_"); | |
9861 | + sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, | |
9862 | + conf->working_disks); | |
9863 | + for (i = 0; i < conf->raid_disks; i++) | |
9864 | + sz += sprintf (page+sz, "%s", | |
9865 | + conf->mirrors[i].operational ? "U" : "_"); | |
9866 | sz += sprintf (page+sz, "]"); | |
9867 | return sz; | |
9868 | } | |
9869 | ||
9870 | -static void raid1_fix_links (struct raid1_data *raid_conf, int failed_index) | |
9871 | +static void unlink_disk (raid1_conf_t *conf, int target) | |
9872 | { | |
9873 | - int disks = raid_conf->raid_disks; | |
9874 | - int j; | |
9875 | + int disks = MD_SB_DISKS; | |
9876 | + int i; | |
9877 | ||
9878 | - for (j = 0; j < disks; j++) | |
9879 | - if (raid_conf->mirrors [j].next == failed_index) | |
9880 | - raid_conf->mirrors [j].next = raid_conf->mirrors [failed_index].next; | |
9881 | + for (i = 0; i < disks; i++) | |
9882 | + if (conf->mirrors[i].next == target) | |
9883 | + conf->mirrors[i].next = conf->mirrors[target].next; | |
9884 | } | |
9885 | ||
9886 | #define LAST_DISK KERN_ALERT \ | |
9887 | @@ -388,48 +390,53 @@ | |
9888 | #define ALREADY_SYNCING KERN_INFO \ | |
9889 | "raid1: syncing already in progress.\n" | |
9890 | ||
9891 | -static int raid1_error (struct md_dev *mddev, kdev_t dev) | |
9892 | +static void mark_disk_bad (mddev_t *mddev, int failed) | |
9893 | { | |
9894 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
9895 | - struct mirror_info *mirror; | |
9896 | - md_superblock_t *sb = mddev->sb; | |
9897 | - int disks = raid_conf->raid_disks; | |
9898 | - int i; | |
9899 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
9900 | + struct mirror_info *mirror = conf->mirrors+failed; | |
9901 | + mdp_super_t *sb = mddev->sb; | |
9902 | + | |
9903 | + mirror->operational = 0; | |
9904 | + unlink_disk(conf, failed); | |
9905 | + mark_disk_faulty(sb->disks+mirror->number); | |
9906 | + mark_disk_nonsync(sb->disks+mirror->number); | |
9907 | + mark_disk_inactive(sb->disks+mirror->number); | |
9908 | + sb->active_disks--; | |
9909 | + sb->working_disks--; | |
9910 | + sb->failed_disks++; | |
9911 | + mddev->sb_dirty = 1; | |
9912 | + md_wakeup_thread(conf->thread); | |
9913 | + conf->working_disks--; | |
9914 | + printk (DISK_FAILED, partition_name (mirror->dev), | |
9915 | + conf->working_disks); | |
9916 | +} | |
9917 | ||
9918 | - PRINTK(("raid1_error called\n")); | |
9919 | +static int raid1_error (mddev_t *mddev, kdev_t dev) | |
9920 | +{ | |
9921 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
9922 | + struct mirror_info * mirrors = conf->mirrors; | |
9923 | + int disks = MD_SB_DISKS; | |
9924 | + int i; | |
9925 | ||
9926 | - if (raid_conf->working_disks == 1) { | |
9927 | + if (conf->working_disks == 1) { | |
9928 | /* | |
9929 | * Uh oh, we can do nothing if this is our last disk, but | |
9930 | * first check if this is a queued request for a device | |
9931 | * which has just failed. | |
9932 | */ | |
9933 | - for (i = 0, mirror = raid_conf->mirrors; i < disks; | |
9934 | - i++, mirror++) | |
9935 | - if (mirror->dev == dev && !mirror->operational) | |
9936 | + for (i = 0; i < disks; i++) { | |
9937 | + if (mirrors[i].dev==dev && !mirrors[i].operational) | |
9938 | return 0; | |
9939 | + } | |
9940 | printk (LAST_DISK); | |
9941 | } else { | |
9942 | - /* Mark disk as unusable */ | |
9943 | - for (i = 0, mirror = raid_conf->mirrors; i < disks; | |
9944 | - i++, mirror++) { | |
9945 | - if (mirror->dev == dev && mirror->operational){ | |
9946 | - mirror->operational = 0; | |
9947 | - raid1_fix_links (raid_conf, i); | |
9948 | - sb->disks[mirror->number].state |= | |
9949 | - (1 << MD_FAULTY_DEVICE); | |
9950 | - sb->disks[mirror->number].state &= | |
9951 | - ~(1 << MD_SYNC_DEVICE); | |
9952 | - sb->disks[mirror->number].state &= | |
9953 | - ~(1 << MD_ACTIVE_DEVICE); | |
9954 | - sb->active_disks--; | |
9955 | - sb->working_disks--; | |
9956 | - sb->failed_disks++; | |
9957 | - mddev->sb_dirty = 1; | |
9958 | - md_wakeup_thread(raid1_thread); | |
9959 | - raid_conf->working_disks--; | |
9960 | - printk (DISK_FAILED, kdevname (dev), | |
9961 | - raid_conf->working_disks); | |
9962 | + /* | |
9963 | + * Mark disk as unusable | |
9964 | + */ | |
9965 | + for (i = 0; i < disks; i++) { | |
9966 | + if (mirrors[i].dev==dev && mirrors[i].operational) { | |
9967 | + mark_disk_bad (mddev, i); | |
9968 | + break; | |
9969 | } | |
9970 | } | |
9971 | } | |
9972 | @@ -442,219 +449,396 @@ | |
9973 | #undef START_SYNCING | |
9974 | ||
9975 | /* | |
9976 | - * This is the personality-specific hot-addition routine | |
9977 | + * Insert the spare disk into the drive-ring | |
9978 | */ | |
9979 | +static void link_disk(raid1_conf_t *conf, struct mirror_info *mirror) | |
9980 | +{ | |
9981 | + int j, next; | |
9982 | + int disks = MD_SB_DISKS; | |
9983 | + struct mirror_info *p = conf->mirrors; | |
9984 | ||
9985 | -#define NO_SUPERBLOCK KERN_ERR \ | |
9986 | -"raid1: cannot hot-add disk to the array with no RAID superblock\n" | |
9987 | + for (j = 0; j < disks; j++, p++) | |
9988 | + if (p->operational && !p->write_only) { | |
9989 | + next = p->next; | |
9990 | + p->next = mirror->raid_disk; | |
9991 | + mirror->next = next; | |
9992 | + return; | |
9993 | + } | |
9994 | ||
9995 | -#define WRONG_LEVEL KERN_ERR \ | |
9996 | -"raid1: hot-add: level of disk is not RAID-1\n" | |
9997 | + printk("raid1: bug: no read-operational devices\n"); | |
9998 | +} | |
9999 | ||
10000 | -#define HOT_ADD_SUCCEEDED KERN_INFO \ | |
10001 | -"raid1: device %s hot-added\n" | |
10002 | +static void print_raid1_conf (raid1_conf_t *conf) | |
10003 | +{ | |
10004 | + int i; | |
10005 | + struct mirror_info *tmp; | |
10006 | ||
10007 | -static int raid1_hot_add_disk (struct md_dev *mddev, kdev_t dev) | |
10008 | + printk("RAID1 conf printout:\n"); | |
10009 | + if (!conf) { | |
10010 | + printk("(conf==NULL)\n"); | |
10011 | + return; | |
10012 | + } | |
10013 | + printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks, | |
10014 | + conf->raid_disks, conf->nr_disks); | |
10015 | + | |
10016 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10017 | + tmp = conf->mirrors + i; | |
10018 | + printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", | |
10019 | + i, tmp->spare,tmp->operational, | |
10020 | + tmp->number,tmp->raid_disk,tmp->used_slot, | |
10021 | + partition_name(tmp->dev)); | |
10022 | + } | |
10023 | +} | |
10024 | + | |
10025 | +static int raid1_diskop(mddev_t *mddev, mdp_disk_t **d, int state) | |
10026 | { | |
10027 | + int err = 0; | |
10028 | + int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; | |
10029 | + raid1_conf_t *conf = mddev->private; | |
10030 | + struct mirror_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; | |
10031 | unsigned long flags; | |
10032 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
10033 | - struct mirror_info *mirror; | |
10034 | - md_superblock_t *sb = mddev->sb; | |
10035 | - struct real_dev * realdev; | |
10036 | - int n; | |
10037 | + mdp_super_t *sb = mddev->sb; | |
10038 | + mdp_disk_t *failed_desc, *spare_desc, *added_desc; | |
10039 | + | |
10040 | + save_flags(flags); | |
10041 | + cli(); | |
10042 | ||
10043 | + print_raid1_conf(conf); | |
10044 | /* | |
10045 | - * The device has its superblock already read and it was found | |
10046 | - * to be consistent for generic RAID usage. Now we check whether | |
10047 | - * it's usable for RAID-1 hot addition. | |
10048 | + * find the disk ... | |
10049 | */ | |
10050 | + switch (state) { | |
10051 | ||
10052 | - n = mddev->nb_dev++; | |
10053 | - realdev = &mddev->devices[n]; | |
10054 | - if (!realdev->sb) { | |
10055 | - printk (NO_SUPERBLOCK); | |
10056 | - return -EINVAL; | |
10057 | - } | |
10058 | - if (realdev->sb->level != 1) { | |
10059 | - printk (WRONG_LEVEL); | |
10060 | - return -EINVAL; | |
10061 | + case DISKOP_SPARE_ACTIVE: | |
10062 | + | |
10063 | + /* | |
10064 | + * Find the failed disk within the RAID1 configuration ... | |
10065 | + * (this can only be in the first conf->working_disks part) | |
10066 | + */ | |
10067 | + for (i = 0; i < conf->raid_disks; i++) { | |
10068 | + tmp = conf->mirrors + i; | |
10069 | + if ((!tmp->operational && !tmp->spare) || | |
10070 | + !tmp->used_slot) { | |
10071 | + failed_disk = i; | |
10072 | + break; | |
10073 | + } | |
10074 | + } | |
10075 | + /* | |
10076 | + * When we activate a spare disk we _must_ have a disk in | |
10077 | + * the lower (active) part of the array to replace. | |
10078 | + */ | |
10079 | + if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { | |
10080 | + MD_BUG(); | |
10081 | + err = 1; | |
10082 | + goto abort; | |
10083 | + } | |
10084 | + /* fall through */ | |
10085 | + | |
10086 | + case DISKOP_SPARE_WRITE: | |
10087 | + case DISKOP_SPARE_INACTIVE: | |
10088 | + | |
10089 | + /* | |
10090 | + * Find the spare disk ... (can only be in the 'high' | |
10091 | + * area of the array) | |
10092 | + */ | |
10093 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
10094 | + tmp = conf->mirrors + i; | |
10095 | + if (tmp->spare && tmp->number == (*d)->number) { | |
10096 | + spare_disk = i; | |
10097 | + break; | |
10098 | + } | |
10099 | + } | |
10100 | + if (spare_disk == -1) { | |
10101 | + MD_BUG(); | |
10102 | + err = 1; | |
10103 | + goto abort; | |
10104 | + } | |
10105 | + break; | |
10106 | + | |
10107 | + case DISKOP_HOT_REMOVE_DISK: | |
10108 | + | |
10109 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10110 | + tmp = conf->mirrors + i; | |
10111 | + if (tmp->used_slot && (tmp->number == (*d)->number)) { | |
10112 | + if (tmp->operational) { | |
10113 | + err = -EBUSY; | |
10114 | + goto abort; | |
10115 | + } | |
10116 | + removed_disk = i; | |
10117 | + break; | |
10118 | + } | |
10119 | + } | |
10120 | + if (removed_disk == -1) { | |
10121 | + MD_BUG(); | |
10122 | + err = 1; | |
10123 | + goto abort; | |
10124 | + } | |
10125 | + break; | |
10126 | + | |
10127 | + case DISKOP_HOT_ADD_DISK: | |
10128 | + | |
10129 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
10130 | + tmp = conf->mirrors + i; | |
10131 | + if (!tmp->used_slot) { | |
10132 | + added_disk = i; | |
10133 | + break; | |
10134 | + } | |
10135 | + } | |
10136 | + if (added_disk == -1) { | |
10137 | + MD_BUG(); | |
10138 | + err = 1; | |
10139 | + goto abort; | |
10140 | + } | |
10141 | + break; | |
10142 | } | |
10143 | - /* FIXME: are there other things left we could sanity-check? */ | |
10144 | ||
10145 | + switch (state) { | |
10146 | /* | |
10147 | - * We have to disable interrupts, as our RAID-1 state is used | |
10148 | - * from irq handlers as well. | |
10149 | + * Switch the spare disk to write-only mode: | |
10150 | */ | |
10151 | - save_flags(flags); | |
10152 | - cli(); | |
10153 | + case DISKOP_SPARE_WRITE: | |
10154 | + sdisk = conf->mirrors + spare_disk; | |
10155 | + sdisk->operational = 1; | |
10156 | + sdisk->write_only = 1; | |
10157 | + break; | |
10158 | + /* | |
10159 | + * Deactivate a spare disk: | |
10160 | + */ | |
10161 | + case DISKOP_SPARE_INACTIVE: | |
10162 | + sdisk = conf->mirrors + spare_disk; | |
10163 | + sdisk->operational = 0; | |
10164 | + sdisk->write_only = 0; | |
10165 | + break; | |
10166 | + /* | |
10167 | + * Activate (mark read-write) the (now sync) spare disk, | |
10168 | + * which means we switch it's 'raid position' (->raid_disk) | |
10169 | + * with the failed disk. (only the first 'conf->nr_disks' | |
10170 | + * slots are used for 'real' disks and we must preserve this | |
10171 | + * property) | |
10172 | + */ | |
10173 | + case DISKOP_SPARE_ACTIVE: | |
10174 | ||
10175 | - raid_conf->raid_disks++; | |
10176 | - mirror = raid_conf->mirrors+n; | |
10177 | + sdisk = conf->mirrors + spare_disk; | |
10178 | + fdisk = conf->mirrors + failed_disk; | |
10179 | ||
10180 | - mirror->number=n; | |
10181 | - mirror->raid_disk=n; | |
10182 | - mirror->dev=dev; | |
10183 | - mirror->next=0; /* FIXME */ | |
10184 | - mirror->sect_limit=128; | |
10185 | - | |
10186 | - mirror->operational=0; | |
10187 | - mirror->spare=1; | |
10188 | - mirror->write_only=0; | |
10189 | - | |
10190 | - sb->disks[n].state |= (1 << MD_FAULTY_DEVICE); | |
10191 | - sb->disks[n].state &= ~(1 << MD_SYNC_DEVICE); | |
10192 | - sb->disks[n].state &= ~(1 << MD_ACTIVE_DEVICE); | |
10193 | - sb->nr_disks++; | |
10194 | - sb->spare_disks++; | |
10195 | + spare_desc = &sb->disks[sdisk->number]; | |
10196 | + failed_desc = &sb->disks[fdisk->number]; | |
10197 | ||
10198 | - restore_flags(flags); | |
10199 | + if (spare_desc != *d) { | |
10200 | + MD_BUG(); | |
10201 | + err = 1; | |
10202 | + goto abort; | |
10203 | + } | |
10204 | ||
10205 | - md_update_sb(MINOR(dev)); | |
10206 | + if (spare_desc->raid_disk != sdisk->raid_disk) { | |
10207 | + MD_BUG(); | |
10208 | + err = 1; | |
10209 | + goto abort; | |
10210 | + } | |
10211 | + | |
10212 | + if (sdisk->raid_disk != spare_disk) { | |
10213 | + MD_BUG(); | |
10214 | + err = 1; | |
10215 | + goto abort; | |
10216 | + } | |
10217 | ||
10218 | - printk (HOT_ADD_SUCCEEDED, kdevname(realdev->dev)); | |
10219 | + if (failed_desc->raid_disk != fdisk->raid_disk) { | |
10220 | + MD_BUG(); | |
10221 | + err = 1; | |
10222 | + goto abort; | |
10223 | + } | |
10224 | ||
10225 | - return 0; | |
10226 | -} | |
10227 | + if (fdisk->raid_disk != failed_disk) { | |
10228 | + MD_BUG(); | |
10229 | + err = 1; | |
10230 | + goto abort; | |
10231 | + } | |
10232 | ||
10233 | -#undef NO_SUPERBLOCK | |
10234 | -#undef WRONG_LEVEL | |
10235 | -#undef HOT_ADD_SUCCEEDED | |
10236 | + /* | |
10237 | + * do the switch finally | |
10238 | + */ | |
10239 | + xchg_values(*spare_desc, *failed_desc); | |
10240 | + xchg_values(*fdisk, *sdisk); | |
10241 | ||
10242 | -/* | |
10243 | - * Insert the spare disk into the drive-ring | |
10244 | - */ | |
10245 | -static void add_ring(struct raid1_data *raid_conf, struct mirror_info *mirror) | |
10246 | -{ | |
10247 | - int j, next; | |
10248 | - struct mirror_info *p = raid_conf->mirrors; | |
10249 | + /* | |
10250 | + * (careful, 'failed' and 'spare' are switched from now on) | |
10251 | + * | |
10252 | + * we want to preserve linear numbering and we want to | |
10253 | + * give the proper raid_disk number to the now activated | |
10254 | + * disk. (this means we switch back these values) | |
10255 | + */ | |
10256 | + | |
10257 | + xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); | |
10258 | + xchg_values(sdisk->raid_disk, fdisk->raid_disk); | |
10259 | + xchg_values(spare_desc->number, failed_desc->number); | |
10260 | + xchg_values(sdisk->number, fdisk->number); | |
10261 | ||
10262 | - for (j = 0; j < raid_conf->raid_disks; j++, p++) | |
10263 | - if (p->operational && !p->write_only) { | |
10264 | - next = p->next; | |
10265 | - p->next = mirror->raid_disk; | |
10266 | - mirror->next = next; | |
10267 | - return; | |
10268 | - } | |
10269 | - printk("raid1: bug: no read-operational devices\n"); | |
10270 | -} | |
10271 | + *d = failed_desc; | |
10272 | ||
10273 | -static int raid1_mark_spare(struct md_dev *mddev, md_descriptor_t *spare, | |
10274 | - int state) | |
10275 | -{ | |
10276 | - int i = 0, failed_disk = -1; | |
10277 | - struct raid1_data *raid_conf = mddev->private; | |
10278 | - struct mirror_info *mirror = raid_conf->mirrors; | |
10279 | - md_descriptor_t *descriptor; | |
10280 | - unsigned long flags; | |
10281 | + if (sdisk->dev == MKDEV(0,0)) | |
10282 | + sdisk->used_slot = 0; | |
10283 | + /* | |
10284 | + * this really activates the spare. | |
10285 | + */ | |
10286 | + fdisk->spare = 0; | |
10287 | + fdisk->write_only = 0; | |
10288 | + link_disk(conf, fdisk); | |
10289 | ||
10290 | - for (i = 0; i < MD_SB_DISKS; i++, mirror++) { | |
10291 | - if (mirror->spare && mirror->number == spare->number) | |
10292 | - goto found; | |
10293 | - } | |
10294 | - return 1; | |
10295 | -found: | |
10296 | - for (i = 0, mirror = raid_conf->mirrors; i < raid_conf->raid_disks; | |
10297 | - i++, mirror++) | |
10298 | - if (!mirror->operational) | |
10299 | - failed_disk = i; | |
10300 | + /* | |
10301 | + * if we activate a spare, we definitely replace a | |
10302 | + * non-operational disk slot in the 'low' area of | |
10303 | + * the disk array. | |
10304 | + */ | |
10305 | ||
10306 | - save_flags(flags); | |
10307 | - cli(); | |
10308 | - switch (state) { | |
10309 | - case SPARE_WRITE: | |
10310 | - mirror->operational = 1; | |
10311 | - mirror->write_only = 1; | |
10312 | - raid_conf->raid_disks = MAX(raid_conf->raid_disks, | |
10313 | - mirror->raid_disk + 1); | |
10314 | - break; | |
10315 | - case SPARE_INACTIVE: | |
10316 | - mirror->operational = 0; | |
10317 | - mirror->write_only = 0; | |
10318 | - break; | |
10319 | - case SPARE_ACTIVE: | |
10320 | - mirror->spare = 0; | |
10321 | - mirror->write_only = 0; | |
10322 | - raid_conf->working_disks++; | |
10323 | - add_ring(raid_conf, mirror); | |
10324 | - | |
10325 | - if (failed_disk != -1) { | |
10326 | - descriptor = &mddev->sb->disks[raid_conf->mirrors[failed_disk].number]; | |
10327 | - i = spare->raid_disk; | |
10328 | - spare->raid_disk = descriptor->raid_disk; | |
10329 | - descriptor->raid_disk = i; | |
10330 | - } | |
10331 | - break; | |
10332 | - default: | |
10333 | - printk("raid1_mark_spare: bug: state == %d\n", state); | |
10334 | - restore_flags(flags); | |
10335 | - return 1; | |
10336 | + conf->working_disks++; | |
10337 | + | |
10338 | + break; | |
10339 | + | |
10340 | + case DISKOP_HOT_REMOVE_DISK: | |
10341 | + rdisk = conf->mirrors + removed_disk; | |
10342 | + | |
10343 | + if (rdisk->spare && (removed_disk < conf->raid_disks)) { | |
10344 | + MD_BUG(); | |
10345 | + err = 1; | |
10346 | + goto abort; | |
10347 | + } | |
10348 | + rdisk->dev = MKDEV(0,0); | |
10349 | + rdisk->used_slot = 0; | |
10350 | + conf->nr_disks--; | |
10351 | + break; | |
10352 | + | |
10353 | + case DISKOP_HOT_ADD_DISK: | |
10354 | + adisk = conf->mirrors + added_disk; | |
10355 | + added_desc = *d; | |
10356 | + | |
10357 | + if (added_disk != added_desc->number) { | |
10358 | + MD_BUG(); | |
10359 | + err = 1; | |
10360 | + goto abort; | |
10361 | + } | |
10362 | + | |
10363 | + adisk->number = added_desc->number; | |
10364 | + adisk->raid_disk = added_desc->raid_disk; | |
10365 | + adisk->dev = MKDEV(added_desc->major,added_desc->minor); | |
10366 | + | |
10367 | + adisk->operational = 0; | |
10368 | + adisk->write_only = 0; | |
10369 | + adisk->spare = 1; | |
10370 | + adisk->used_slot = 1; | |
10371 | + conf->nr_disks++; | |
10372 | + | |
10373 | + break; | |
10374 | + | |
10375 | + default: | |
10376 | + MD_BUG(); | |
10377 | + err = 1; | |
10378 | + goto abort; | |
10379 | } | |
10380 | +abort: | |
10381 | restore_flags(flags); | |
10382 | - return 0; | |
10383 | + print_raid1_conf(conf); | |
10384 | + return err; | |
10385 | } | |
10386 | ||
10387 | + | |
10388 | +#define IO_ERROR KERN_ALERT \ | |
10389 | +"raid1: %s: unrecoverable I/O read error for block %lu\n" | |
10390 | + | |
10391 | +#define REDIRECT_SECTOR KERN_ERR \ | |
10392 | +"raid1: %s: redirecting sector %lu to another mirror\n" | |
10393 | + | |
10394 | /* | |
10395 | * This is a kernel thread which: | |
10396 | * | |
10397 | * 1. Retries failed read operations on working mirrors. | |
10398 | * 2. Updates the raid superblock when problems encounter. | |
10399 | */ | |
10400 | -void raid1d (void *data) | |
10401 | +static void raid1d (void *data) | |
10402 | { | |
10403 | struct buffer_head *bh; | |
10404 | kdev_t dev; | |
10405 | unsigned long flags; | |
10406 | - struct raid1_bh * r1_bh; | |
10407 | - struct md_dev *mddev; | |
10408 | + struct raid1_bh *r1_bh; | |
10409 | + mddev_t *mddev; | |
10410 | ||
10411 | - PRINTK(("raid1d() active\n")); | |
10412 | - save_flags(flags); | |
10413 | - cli(); | |
10414 | while (raid1_retry_list) { | |
10415 | + save_flags(flags); | |
10416 | + cli(); | |
10417 | bh = raid1_retry_list; | |
10418 | r1_bh = (struct raid1_bh *)(bh->b_dev_id); | |
10419 | raid1_retry_list = r1_bh->next_retry; | |
10420 | restore_flags(flags); | |
10421 | ||
10422 | - mddev = md_dev + MINOR(bh->b_dev); | |
10423 | + mddev = kdev_to_mddev(bh->b_dev); | |
10424 | if (mddev->sb_dirty) { | |
10425 | - printk("dirty sb detected, updating.\n"); | |
10426 | + printk(KERN_INFO "dirty sb detected, updating.\n"); | |
10427 | mddev->sb_dirty = 0; | |
10428 | - md_update_sb(MINOR(bh->b_dev)); | |
10429 | + md_update_sb(mddev); | |
10430 | } | |
10431 | dev = bh->b_rdev; | |
10432 | - __raid1_map (md_dev + MINOR(bh->b_dev), &bh->b_rdev, &bh->b_rsector, bh->b_size >> 9); | |
10433 | + __raid1_map (mddev, &bh->b_rdev, &bh->b_rsector, | |
10434 | + bh->b_size >> 9); | |
10435 | if (bh->b_rdev == dev) { | |
10436 | - printk (KERN_ALERT | |
10437 | - "raid1: %s: unrecoverable I/O read error for block %lu\n", | |
10438 | - kdevname(bh->b_dev), bh->b_blocknr); | |
10439 | - raid1_end_buffer_io(r1_bh, 0); | |
10440 | + printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr); | |
10441 | + raid1_end_bh_io(r1_bh, 0); | |
10442 | } else { | |
10443 | - printk (KERN_ERR "raid1: %s: redirecting sector %lu to another mirror\n", | |
10444 | - kdevname(bh->b_dev), bh->b_blocknr); | |
10445 | + printk (REDIRECT_SECTOR, | |
10446 | + partition_name(bh->b_dev), bh->b_blocknr); | |
10447 | map_and_make_request (r1_bh->cmd, bh); | |
10448 | } | |
10449 | - cli(); | |
10450 | } | |
10451 | - restore_flags(flags); | |
10452 | +} | |
10453 | +#undef IO_ERROR | |
10454 | +#undef REDIRECT_SECTOR | |
10455 | + | |
10456 | +/* | |
10457 | + * Private kernel thread to reconstruct mirrors after an unclean | |
10458 | + * shutdown. | |
10459 | + */ | |
10460 | +static void raid1syncd (void *data) | |
10461 | +{ | |
10462 | + raid1_conf_t *conf = data; | |
10463 | + mddev_t *mddev = conf->mddev; | |
10464 | + | |
10465 | + if (!conf->resync_mirrors) | |
10466 | + return; | |
10467 | + if (conf->resync_mirrors == 2) | |
10468 | + return; | |
10469 | + down(&mddev->recovery_sem); | |
10470 | + if (md_do_sync(mddev, NULL)) { | |
10471 | + up(&mddev->recovery_sem); | |
10472 | + return; | |
10473 | + } | |
10474 | + /* | |
10475 | + * Only if everything went Ok. | |
10476 | + */ | |
10477 | + conf->resync_mirrors = 0; | |
10478 | + up(&mddev->recovery_sem); | |
10479 | } | |
10480 | ||
10481 | + | |
10482 | /* | |
10483 | * This will catch the scenario in which one of the mirrors was | |
10484 | * mounted as a normal device rather than as a part of a raid set. | |
10485 | + * | |
10486 | + * check_consistency is very personality-dependent, eg. RAID5 cannot | |
10487 | + * do this check, it uses another method. | |
10488 | */ | |
10489 | -static int __check_consistency (struct md_dev *mddev, int row) | |
10490 | +static int __check_consistency (mddev_t *mddev, int row) | |
10491 | { | |
10492 | - struct raid1_data *raid_conf = mddev->private; | |
10493 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
10494 | + int disks = MD_SB_DISKS; | |
10495 | kdev_t dev; | |
10496 | struct buffer_head *bh = NULL; | |
10497 | int i, rc = 0; | |
10498 | char *buffer = NULL; | |
10499 | ||
10500 | - for (i = 0; i < raid_conf->raid_disks; i++) { | |
10501 | - if (!raid_conf->mirrors[i].operational) | |
10502 | + for (i = 0; i < disks; i++) { | |
10503 | + printk("(checking disk %d)\n",i); | |
10504 | + if (!conf->mirrors[i].operational) | |
10505 | continue; | |
10506 | - dev = raid_conf->mirrors[i].dev; | |
10507 | + printk("(really checking disk %d)\n",i); | |
10508 | + dev = conf->mirrors[i].dev; | |
10509 | set_blocksize(dev, 4096); | |
10510 | if ((bh = bread(dev, row / 4, 4096)) == NULL) | |
10511 | break; | |
10512 | @@ -683,167 +867,342 @@ | |
10513 | return rc; | |
10514 | } | |
10515 | ||
10516 | -static int check_consistency (struct md_dev *mddev) | |
10517 | +static int check_consistency (mddev_t *mddev) | |
10518 | { | |
10519 | - int size = mddev->sb->size; | |
10520 | - int row; | |
10521 | + if (__check_consistency(mddev, 0)) | |
10522 | +/* | |
10523 | + * we do not do this currently, as it's perfectly possible to | |
10524 | + * have an inconsistent array when it's freshly created. Only | |
10525 | + * newly written data has to be consistent. | |
10526 | + */ | |
10527 | + return 0; | |
10528 | ||
10529 | - for (row = 0; row < size; row += size / 8) | |
10530 | - if (__check_consistency(mddev, row)) | |
10531 | - return 1; | |
10532 | return 0; | |
10533 | } | |
10534 | ||
10535 | -static int raid1_run (int minor, struct md_dev *mddev) | |
10536 | +#define INVALID_LEVEL KERN_WARNING \ | |
10537 | +"raid1: md%d: raid level not set to mirroring (%d)\n" | |
10538 | + | |
10539 | +#define NO_SB KERN_ERR \ | |
10540 | +"raid1: disabled mirror %s (couldn't access raid superblock)\n" | |
10541 | + | |
10542 | +#define ERRORS KERN_ERR \ | |
10543 | +"raid1: disabled mirror %s (errors detected)\n" | |
10544 | + | |
10545 | +#define NOT_IN_SYNC KERN_ERR \ | |
10546 | +"raid1: disabled mirror %s (not in sync)\n" | |
10547 | + | |
10548 | +#define INCONSISTENT KERN_ERR \ | |
10549 | +"raid1: disabled mirror %s (inconsistent descriptor)\n" | |
10550 | + | |
10551 | +#define ALREADY_RUNNING KERN_ERR \ | |
10552 | +"raid1: disabled mirror %s (mirror %d already operational)\n" | |
10553 | + | |
10554 | +#define OPERATIONAL KERN_INFO \ | |
10555 | +"raid1: device %s operational as mirror %d\n" | |
10556 | + | |
10557 | +#define MEM_ERROR KERN_ERR \ | |
10558 | +"raid1: couldn't allocate memory for md%d\n" | |
10559 | + | |
10560 | +#define SPARE KERN_INFO \ | |
10561 | +"raid1: spare disk %s\n" | |
10562 | + | |
10563 | +#define NONE_OPERATIONAL KERN_ERR \ | |
10564 | +"raid1: no operational mirrors for md%d\n" | |
10565 | + | |
10566 | +#define RUNNING_CKRAID KERN_ERR \ | |
10567 | +"raid1: detected mirror differences -- running resync\n" | |
10568 | + | |
10569 | +#define ARRAY_IS_ACTIVE KERN_INFO \ | |
10570 | +"raid1: raid set md%d active with %d out of %d mirrors\n" | |
10571 | + | |
10572 | +#define THREAD_ERROR KERN_ERR \ | |
10573 | +"raid1: couldn't allocate thread for md%d\n" | |
10574 | + | |
10575 | +#define START_RESYNC KERN_WARNING \ | |
10576 | +"raid1: raid set md%d not clean; reconstructing mirrors\n" | |
10577 | + | |
10578 | +static int raid1_run (mddev_t *mddev) | |
10579 | { | |
10580 | - struct raid1_data *raid_conf; | |
10581 | - int i, j, raid_disk; | |
10582 | - md_superblock_t *sb = mddev->sb; | |
10583 | - md_descriptor_t *descriptor; | |
10584 | - struct real_dev *realdev; | |
10585 | + raid1_conf_t *conf; | |
10586 | + int i, j, disk_idx; | |
10587 | + struct mirror_info *disk; | |
10588 | + mdp_super_t *sb = mddev->sb; | |
10589 | + mdp_disk_t *descriptor; | |
10590 | + mdk_rdev_t *rdev; | |
10591 | + struct md_list_head *tmp; | |
10592 | + int start_recovery = 0; | |
10593 | ||
10594 | MOD_INC_USE_COUNT; | |
10595 | ||
10596 | if (sb->level != 1) { | |
10597 | - printk("raid1: %s: raid level not set to mirroring (%d)\n", | |
10598 | - kdevname(MKDEV(MD_MAJOR, minor)), sb->level); | |
10599 | - MOD_DEC_USE_COUNT; | |
10600 | - return -EIO; | |
10601 | - } | |
10602 | - /**** | |
10603 | - * copy the now verified devices into our private RAID1 bookkeeping | |
10604 | - * area. [whatever we allocate in raid1_run(), should be freed in | |
10605 | - * raid1_stop()] | |
10606 | + printk(INVALID_LEVEL, mdidx(mddev), sb->level); | |
10607 | + goto out; | |
10608 | + } | |
10609 | + /* | |
10610 | + * copy the already verified devices into our private RAID1 | |
10611 | + * bookkeeping area. [whatever we allocate in raid1_run(), | |
10612 | + * should be freed in raid1_stop()] | |
10613 | */ | |
10614 | ||
10615 | - while (!( /* FIXME: now we are rather fault tolerant than nice */ | |
10616 | - mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL) | |
10617 | - ) ) | |
10618 | - { | |
10619 | - printk ("raid1_run(): out of memory\n"); | |
10620 | - current->policy |= SCHED_YIELD; | |
10621 | - schedule(); | |
10622 | - } | |
10623 | - raid_conf = mddev->private; | |
10624 | - memset(raid_conf, 0, sizeof(*raid_conf)); | |
10625 | - | |
10626 | - PRINTK(("raid1_run(%d) called.\n", minor)); | |
10627 | - | |
10628 | - for (i = 0; i < mddev->nb_dev; i++) { | |
10629 | - realdev = &mddev->devices[i]; | |
10630 | - if (!realdev->sb) { | |
10631 | - printk(KERN_ERR "raid1: disabled mirror %s (couldn't access raid superblock)\n", kdevname(realdev->dev)); | |
10632 | + conf = raid1_kmalloc(sizeof(raid1_conf_t)); | |
10633 | + mddev->private = conf; | |
10634 | + if (!conf) { | |
10635 | + printk(MEM_ERROR, mdidx(mddev)); | |
10636 | + goto out; | |
10637 | + } | |
10638 | + | |
10639 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
10640 | + if (rdev->faulty) { | |
10641 | + printk(ERRORS, partition_name(rdev->dev)); | |
10642 | + } else { | |
10643 | + if (!rdev->sb) { | |
10644 | + MD_BUG(); | |
10645 | + continue; | |
10646 | + } | |
10647 | + } | |
10648 | + if (rdev->desc_nr == -1) { | |
10649 | + MD_BUG(); | |
10650 | continue; | |
10651 | } | |
10652 | - | |
10653 | - /* | |
10654 | - * This is important -- we are using the descriptor on | |
10655 | - * the disk only to get a pointer to the descriptor on | |
10656 | - * the main superblock, which might be more recent. | |
10657 | - */ | |
10658 | - descriptor = &sb->disks[realdev->sb->descriptor.number]; | |
10659 | - if (descriptor->state & (1 << MD_FAULTY_DEVICE)) { | |
10660 | - printk(KERN_ERR "raid1: disabled mirror %s (errors detected)\n", kdevname(realdev->dev)); | |
10661 | + descriptor = &sb->disks[rdev->desc_nr]; | |
10662 | + disk_idx = descriptor->raid_disk; | |
10663 | + disk = conf->mirrors + disk_idx; | |
10664 | + | |
10665 | + if (disk_faulty(descriptor)) { | |
10666 | + disk->number = descriptor->number; | |
10667 | + disk->raid_disk = disk_idx; | |
10668 | + disk->dev = rdev->dev; | |
10669 | + disk->sect_limit = MAX_LINEAR_SECTORS; | |
10670 | + disk->operational = 0; | |
10671 | + disk->write_only = 0; | |
10672 | + disk->spare = 0; | |
10673 | + disk->used_slot = 1; | |
10674 | continue; | |
10675 | } | |
10676 | - if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) { | |
10677 | - if (!(descriptor->state & (1 << MD_SYNC_DEVICE))) { | |
10678 | - printk(KERN_ERR "raid1: disabled mirror %s (not in sync)\n", kdevname(realdev->dev)); | |
10679 | + if (disk_active(descriptor)) { | |
10680 | + if (!disk_sync(descriptor)) { | |
10681 | + printk(NOT_IN_SYNC, | |
10682 | + partition_name(rdev->dev)); | |
10683 | continue; | |
10684 | } | |
10685 | - raid_disk = descriptor->raid_disk; | |
10686 | - if (descriptor->number > sb->nr_disks || raid_disk > sb->raid_disks) { | |
10687 | - printk(KERN_ERR "raid1: disabled mirror %s (inconsistent descriptor)\n", kdevname(realdev->dev)); | |
10688 | + if ((descriptor->number > MD_SB_DISKS) || | |
10689 | + (disk_idx > sb->raid_disks)) { | |
10690 | + | |
10691 | + printk(INCONSISTENT, | |
10692 | + partition_name(rdev->dev)); | |
10693 | continue; | |
10694 | } | |
10695 | - if (raid_conf->mirrors[raid_disk].operational) { | |
10696 | - printk(KERN_ERR "raid1: disabled mirror %s (mirror %d already operational)\n", kdevname(realdev->dev), raid_disk); | |
10697 | + if (disk->operational) { | |
10698 | + printk(ALREADY_RUNNING, | |
10699 | + partition_name(rdev->dev), | |
10700 | + disk_idx); | |
10701 | continue; | |
10702 | } | |
10703 | - printk(KERN_INFO "raid1: device %s operational as mirror %d\n", kdevname(realdev->dev), raid_disk); | |
10704 | - raid_conf->mirrors[raid_disk].number = descriptor->number; | |
10705 | - raid_conf->mirrors[raid_disk].raid_disk = raid_disk; | |
10706 | - raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev; | |
10707 | - raid_conf->mirrors[raid_disk].operational = 1; | |
10708 | - raid_conf->mirrors[raid_disk].sect_limit = 128; | |
10709 | - raid_conf->working_disks++; | |
10710 | + printk(OPERATIONAL, partition_name(rdev->dev), | |
10711 | + disk_idx); | |
10712 | + disk->number = descriptor->number; | |
10713 | + disk->raid_disk = disk_idx; | |
10714 | + disk->dev = rdev->dev; | |
10715 | + disk->sect_limit = MAX_LINEAR_SECTORS; | |
10716 | + disk->operational = 1; | |
10717 | + disk->write_only = 0; | |
10718 | + disk->spare = 0; | |
10719 | + disk->used_slot = 1; | |
10720 | + conf->working_disks++; | |
10721 | } else { | |
10722 | /* | |
10723 | * Must be a spare disk .. | |
10724 | */ | |
10725 | - printk(KERN_INFO "raid1: spare disk %s\n", kdevname(realdev->dev)); | |
10726 | - raid_disk = descriptor->raid_disk; | |
10727 | - raid_conf->mirrors[raid_disk].number = descriptor->number; | |
10728 | - raid_conf->mirrors[raid_disk].raid_disk = raid_disk; | |
10729 | - raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev; | |
10730 | - raid_conf->mirrors[raid_disk].sect_limit = 128; | |
10731 | - | |
10732 | - raid_conf->mirrors[raid_disk].operational = 0; | |
10733 | - raid_conf->mirrors[raid_disk].write_only = 0; | |
10734 | - raid_conf->mirrors[raid_disk].spare = 1; | |
10735 | - } | |
10736 | - } | |
10737 | - if (!raid_conf->working_disks) { | |
10738 | - printk(KERN_ERR "raid1: no operational mirrors for %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
10739 | - kfree(raid_conf); | |
10740 | - mddev->private = NULL; | |
10741 | - MOD_DEC_USE_COUNT; | |
10742 | - return -EIO; | |
10743 | - } | |
10744 | - | |
10745 | - raid_conf->raid_disks = sb->raid_disks; | |
10746 | - raid_conf->mddev = mddev; | |
10747 | - | |
10748 | - for (j = 0; !raid_conf->mirrors[j].operational; j++); | |
10749 | - raid_conf->last_used = j; | |
10750 | - for (i = raid_conf->raid_disks - 1; i >= 0; i--) { | |
10751 | - if (raid_conf->mirrors[i].operational) { | |
10752 | - PRINTK(("raid_conf->mirrors[%d].next == %d\n", i, j)); | |
10753 | - raid_conf->mirrors[i].next = j; | |
10754 | + printk(SPARE, partition_name(rdev->dev)); | |
10755 | + disk->number = descriptor->number; | |
10756 | + disk->raid_disk = disk_idx; | |
10757 | + disk->dev = rdev->dev; | |
10758 | + disk->sect_limit = MAX_LINEAR_SECTORS; | |
10759 | + disk->operational = 0; | |
10760 | + disk->write_only = 0; | |
10761 | + disk->spare = 1; | |
10762 | + disk->used_slot = 1; | |
10763 | + } | |
10764 | + } | |
10765 | + if (!conf->working_disks) { | |
10766 | + printk(NONE_OPERATIONAL, mdidx(mddev)); | |
10767 | + goto out_free_conf; | |
10768 | + } | |
10769 | + | |
10770 | + conf->raid_disks = sb->raid_disks; | |
10771 | + conf->nr_disks = sb->nr_disks; | |
10772 | + conf->mddev = mddev; | |
10773 | + | |
10774 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10775 | + | |
10776 | + descriptor = sb->disks+i; | |
10777 | + disk_idx = descriptor->raid_disk; | |
10778 | + disk = conf->mirrors + disk_idx; | |
10779 | + | |
10780 | + if (disk_faulty(descriptor) && (disk_idx < conf->raid_disks) && | |
10781 | + !disk->used_slot) { | |
10782 | + | |
10783 | + disk->number = descriptor->number; | |
10784 | + disk->raid_disk = disk_idx; | |
10785 | + disk->dev = MKDEV(0,0); | |
10786 | + | |
10787 | + disk->operational = 0; | |
10788 | + disk->write_only = 0; | |
10789 | + disk->spare = 0; | |
10790 | + disk->used_slot = 1; | |
10791 | + } | |
10792 | + } | |
10793 | + | |
10794 | + /* | |
10795 | + * find the first working one and use it as a starting point | |
10796 | + * to read balancing. | |
10797 | + */ | |
10798 | + for (j = 0; !conf->mirrors[j].operational; j++) | |
10799 | + /* nothing */; | |
10800 | + conf->last_used = j; | |
10801 | + | |
10802 | + /* | |
10803 | + * initialize the 'working disks' list. | |
10804 | + */ | |
10805 | + for (i = conf->raid_disks - 1; i >= 0; i--) { | |
10806 | + if (conf->mirrors[i].operational) { | |
10807 | + conf->mirrors[i].next = j; | |
10808 | j = i; | |
10809 | } | |
10810 | } | |
10811 | ||
10812 | - if (check_consistency(mddev)) { | |
10813 | - printk(KERN_ERR "raid1: detected mirror differences -- run ckraid\n"); | |
10814 | - sb->state |= 1 << MD_SB_ERRORS; | |
10815 | - kfree(raid_conf); | |
10816 | - mddev->private = NULL; | |
10817 | - MOD_DEC_USE_COUNT; | |
10818 | - return -EIO; | |
10819 | + if (conf->working_disks != sb->raid_disks) { | |
10820 | + printk(KERN_ALERT "raid1: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); | |
10821 | + start_recovery = 1; | |
10822 | } | |
10823 | ||
10824 | + if (!start_recovery && (sb->state & (1 << MD_SB_CLEAN))) { | |
10825 | + /* | |
10826 | + * we do sanity checks even if the device says | |
10827 | + * it's clean ... | |
10828 | + */ | |
10829 | + if (check_consistency(mddev)) { | |
10830 | + printk(RUNNING_CKRAID); | |
10831 | + sb->state &= ~(1 << MD_SB_CLEAN); | |
10832 | + } | |
10833 | + } | |
10834 | + | |
10835 | + { | |
10836 | + const char * name = "raid1d"; | |
10837 | + | |
10838 | + conf->thread = md_register_thread(raid1d, conf, name); | |
10839 | + if (!conf->thread) { | |
10840 | + printk(THREAD_ERROR, mdidx(mddev)); | |
10841 | + goto out_free_conf; | |
10842 | + } | |
10843 | + } | |
10844 | + | |
10845 | + if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN))) { | |
10846 | + const char * name = "raid1syncd"; | |
10847 | + | |
10848 | + conf->resync_thread = md_register_thread(raid1syncd, conf,name); | |
10849 | + if (!conf->resync_thread) { | |
10850 | + printk(THREAD_ERROR, mdidx(mddev)); | |
10851 | + goto out_free_conf; | |
10852 | + } | |
10853 | + | |
10854 | + printk(START_RESYNC, mdidx(mddev)); | |
10855 | + conf->resync_mirrors = 1; | |
10856 | + md_wakeup_thread(conf->resync_thread); | |
10857 | + } | |
10858 | + | |
10859 | /* | |
10860 | * Regenerate the "device is in sync with the raid set" bit for | |
10861 | * each device. | |
10862 | */ | |
10863 | - for (i = 0; i < sb->nr_disks ; i++) { | |
10864 | - sb->disks[i].state &= ~(1 << MD_SYNC_DEVICE); | |
10865 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
10866 | + mark_disk_nonsync(sb->disks+i); | |
10867 | for (j = 0; j < sb->raid_disks; j++) { | |
10868 | - if (!raid_conf->mirrors[j].operational) | |
10869 | + if (!conf->mirrors[j].operational) | |
10870 | continue; | |
10871 | - if (sb->disks[i].number == raid_conf->mirrors[j].number) | |
10872 | - sb->disks[i].state |= 1 << MD_SYNC_DEVICE; | |
10873 | + if (sb->disks[i].number == conf->mirrors[j].number) | |
10874 | + mark_disk_sync(sb->disks+i); | |
10875 | } | |
10876 | } | |
10877 | - sb->active_disks = raid_conf->working_disks; | |
10878 | + sb->active_disks = conf->working_disks; | |
10879 | ||
10880 | - printk("raid1: raid set %s active with %d out of %d mirrors\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks); | |
10881 | - /* Ok, everything is just fine now */ | |
10882 | - return (0); | |
10883 | + if (start_recovery) | |
10884 | + md_recover_arrays(); | |
10885 | + | |
10886 | + | |
10887 | + printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks); | |
10888 | + /* | |
10889 | + * Ok, everything is just fine now | |
10890 | + */ | |
10891 | + return 0; | |
10892 | + | |
10893 | +out_free_conf: | |
10894 | + kfree(conf); | |
10895 | + mddev->private = NULL; | |
10896 | +out: | |
10897 | + MOD_DEC_USE_COUNT; | |
10898 | + return -EIO; | |
10899 | +} | |
10900 | + | |
10901 | +#undef INVALID_LEVEL | |
10902 | +#undef NO_SB | |
10903 | +#undef ERRORS | |
10904 | +#undef NOT_IN_SYNC | |
10905 | +#undef INCONSISTENT | |
10906 | +#undef ALREADY_RUNNING | |
10907 | +#undef OPERATIONAL | |
10908 | +#undef SPARE | |
10909 | +#undef NONE_OPERATIONAL | |
10910 | +#undef RUNNING_CKRAID | |
10911 | +#undef ARRAY_IS_ACTIVE | |
10912 | + | |
10913 | +static int raid1_stop_resync (mddev_t *mddev) | |
10914 | +{ | |
10915 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
10916 | + | |
10917 | + if (conf->resync_thread) { | |
10918 | + if (conf->resync_mirrors) { | |
10919 | + conf->resync_mirrors = 2; | |
10920 | + md_interrupt_thread(conf->resync_thread); | |
10921 | + printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n"); | |
10922 | + return 1; | |
10923 | + } | |
10924 | + return 0; | |
10925 | + } | |
10926 | + return 0; | |
10927 | +} | |
10928 | + | |
10929 | +static int raid1_restart_resync (mddev_t *mddev) | |
10930 | +{ | |
10931 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
10932 | + | |
10933 | + if (conf->resync_mirrors) { | |
10934 | + if (!conf->resync_thread) { | |
10935 | + MD_BUG(); | |
10936 | + return 0; | |
10937 | + } | |
10938 | + conf->resync_mirrors = 1; | |
10939 | + md_wakeup_thread(conf->resync_thread); | |
10940 | + return 1; | |
10941 | + } | |
10942 | + return 0; | |
10943 | } | |
10944 | ||
10945 | -static int raid1_stop (int minor, struct md_dev *mddev) | |
10946 | +static int raid1_stop (mddev_t *mddev) | |
10947 | { | |
10948 | - struct raid1_data *raid_conf = (struct raid1_data *) mddev->private; | |
10949 | + raid1_conf_t *conf = mddev_to_conf(mddev); | |
10950 | ||
10951 | - kfree (raid_conf); | |
10952 | + md_unregister_thread(conf->thread); | |
10953 | + if (conf->resync_thread) | |
10954 | + md_unregister_thread(conf->resync_thread); | |
10955 | + kfree(conf); | |
10956 | mddev->private = NULL; | |
10957 | MOD_DEC_USE_COUNT; | |
10958 | return 0; | |
10959 | } | |
10960 | ||
10961 | -static struct md_personality raid1_personality= | |
10962 | +static mdk_personality_t raid1_personality= | |
10963 | { | |
10964 | "raid1", | |
10965 | raid1_map, | |
10966 | @@ -855,15 +1214,13 @@ | |
10967 | NULL, /* no ioctls */ | |
10968 | 0, | |
10969 | raid1_error, | |
10970 | - raid1_hot_add_disk, | |
10971 | - /* raid1_hot_remove_drive */ NULL, | |
10972 | - raid1_mark_spare | |
10973 | + raid1_diskop, | |
10974 | + raid1_stop_resync, | |
10975 | + raid1_restart_resync | |
10976 | }; | |
10977 | ||
10978 | int raid1_init (void) | |
10979 | { | |
10980 | - if ((raid1_thread = md_register_thread(raid1d, NULL)) == NULL) | |
10981 | - return -EBUSY; | |
10982 | return register_md_personality (RAID1, &raid1_personality); | |
10983 | } | |
10984 | ||
10985 | @@ -875,7 +1232,6 @@ | |
10986 | ||
10987 | void cleanup_module (void) | |
10988 | { | |
10989 | - md_unregister_thread (raid1_thread); | |
10990 | unregister_md_personality (RAID1); | |
10991 | } | |
10992 | #endif | |
10993 | --- linux/drivers/block/raid5.c.orig Fri May 8 09:17:13 1998 | |
10994 | +++ linux/drivers/block/raid5.c Fri Nov 23 11:18:20 2001 | |
10995 | @@ -1,4 +1,4 @@ | |
10996 | -/***************************************************************************** | |
10997 | +/* | |
10998 | * raid5.c : Multiple Devices driver for Linux | |
10999 | * Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
11000 | * | |
11001 | @@ -14,16 +14,15 @@ | |
11002 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
11003 | */ | |
11004 | ||
11005 | + | |
11006 | #include <linux/module.h> | |
11007 | #include <linux/locks.h> | |
11008 | #include <linux/malloc.h> | |
11009 | -#include <linux/md.h> | |
11010 | -#include <linux/raid5.h> | |
11011 | +#include <linux/raid/raid5.h> | |
11012 | #include <asm/bitops.h> | |
11013 | #include <asm/atomic.h> | |
11014 | -#include <asm/md.h> | |
11015 | ||
11016 | -static struct md_personality raid5_personality; | |
11017 | +static mdk_personality_t raid5_personality; | |
11018 | ||
11019 | /* | |
11020 | * Stripe cache | |
11021 | @@ -33,7 +32,7 @@ | |
11022 | #define HASH_PAGES_ORDER 0 | |
11023 | #define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) | |
11024 | #define HASH_MASK (NR_HASH - 1) | |
11025 | -#define stripe_hash(raid_conf, sect, size) ((raid_conf)->stripe_hashtbl[((sect) / (size >> 9)) & HASH_MASK]) | |
11026 | +#define stripe_hash(conf, sect, size) ((conf)->stripe_hashtbl[((sect) / (size >> 9)) & HASH_MASK]) | |
11027 | ||
11028 | /* | |
11029 | * The following can be used to debug the driver | |
11030 | @@ -46,6 +45,8 @@ | |
11031 | #define PRINTK(x) do { ; } while (0) | |
11032 | #endif | |
11033 | ||
11034 | +static void print_raid5_conf (raid5_conf_t *conf); | |
11035 | + | |
11036 | static inline int stripe_locked(struct stripe_head *sh) | |
11037 | { | |
11038 | return test_bit(STRIPE_LOCKED, &sh->state); | |
11039 | @@ -61,32 +62,32 @@ | |
11040 | */ | |
11041 | static inline void lock_stripe(struct stripe_head *sh) | |
11042 | { | |
11043 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11044 | - if (!test_and_set_bit(STRIPE_LOCKED, &sh->state)) { | |
11045 | + raid5_conf_t *conf = sh->raid_conf; | |
11046 | + if (!md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) { | |
11047 | PRINTK(("locking stripe %lu\n", sh->sector)); | |
11048 | - raid_conf->nr_locked_stripes++; | |
11049 | + conf->nr_locked_stripes++; | |
11050 | } | |
11051 | } | |
11052 | ||
11053 | static inline void unlock_stripe(struct stripe_head *sh) | |
11054 | { | |
11055 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11056 | - if (test_and_clear_bit(STRIPE_LOCKED, &sh->state)) { | |
11057 | + raid5_conf_t *conf = sh->raid_conf; | |
11058 | + if (md_test_and_clear_bit(STRIPE_LOCKED, &sh->state)) { | |
11059 | PRINTK(("unlocking stripe %lu\n", sh->sector)); | |
11060 | - raid_conf->nr_locked_stripes--; | |
11061 | + conf->nr_locked_stripes--; | |
11062 | wake_up(&sh->wait); | |
11063 | } | |
11064 | } | |
11065 | ||
11066 | static inline void finish_stripe(struct stripe_head *sh) | |
11067 | { | |
11068 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11069 | + raid5_conf_t *conf = sh->raid_conf; | |
11070 | unlock_stripe(sh); | |
11071 | sh->cmd = STRIPE_NONE; | |
11072 | sh->phase = PHASE_COMPLETE; | |
11073 | - raid_conf->nr_pending_stripes--; | |
11074 | - raid_conf->nr_cached_stripes++; | |
11075 | - wake_up(&raid_conf->wait_for_stripe); | |
11076 | + conf->nr_pending_stripes--; | |
11077 | + conf->nr_cached_stripes++; | |
11078 | + wake_up(&conf->wait_for_stripe); | |
11079 | } | |
11080 | ||
11081 | void __wait_on_stripe(struct stripe_head *sh) | |
11082 | @@ -114,7 +115,7 @@ | |
11083 | __wait_on_stripe(sh); | |
11084 | } | |
11085 | ||
11086 | -static inline void remove_hash(struct raid5_data *raid_conf, struct stripe_head *sh) | |
11087 | +static inline void remove_hash(raid5_conf_t *conf, struct stripe_head *sh) | |
11088 | { | |
11089 | PRINTK(("remove_hash(), stripe %lu\n", sh->sector)); | |
11090 | ||
11091 | @@ -123,21 +124,22 @@ | |
11092 | sh->hash_next->hash_pprev = sh->hash_pprev; | |
11093 | *sh->hash_pprev = sh->hash_next; | |
11094 | sh->hash_pprev = NULL; | |
11095 | - raid_conf->nr_hashed_stripes--; | |
11096 | + conf->nr_hashed_stripes--; | |
11097 | } | |
11098 | } | |
11099 | ||
11100 | -static inline void insert_hash(struct raid5_data *raid_conf, struct stripe_head *sh) | |
11101 | +static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) | |
11102 | { | |
11103 | - struct stripe_head **shp = &stripe_hash(raid_conf, sh->sector, sh->size); | |
11104 | + struct stripe_head **shp = &stripe_hash(conf, sh->sector, sh->size); | |
11105 | ||
11106 | - PRINTK(("insert_hash(), stripe %lu, nr_hashed_stripes %d\n", sh->sector, raid_conf->nr_hashed_stripes)); | |
11107 | + PRINTK(("insert_hash(), stripe %lu, nr_hashed_stripes %d\n", | |
11108 | + sh->sector, conf->nr_hashed_stripes)); | |
11109 | ||
11110 | if ((sh->hash_next = *shp) != NULL) | |
11111 | (*shp)->hash_pprev = &sh->hash_next; | |
11112 | *shp = sh; | |
11113 | sh->hash_pprev = shp; | |
11114 | - raid_conf->nr_hashed_stripes++; | |
11115 | + conf->nr_hashed_stripes++; | |
11116 | } | |
11117 | ||
11118 | static struct buffer_head *get_free_buffer(struct stripe_head *sh, int b_size) | |
11119 | @@ -145,13 +147,15 @@ | |
11120 | struct buffer_head *bh; | |
11121 | unsigned long flags; | |
11122 | ||
11123 | - save_flags(flags); | |
11124 | - cli(); | |
11125 | - if ((bh = sh->buffer_pool) == NULL) | |
11126 | - return NULL; | |
11127 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
11128 | + bh = sh->buffer_pool; | |
11129 | + if (!bh) | |
11130 | + goto out_unlock; | |
11131 | sh->buffer_pool = bh->b_next; | |
11132 | bh->b_size = b_size; | |
11133 | - restore_flags(flags); | |
11134 | +out_unlock: | |
11135 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
11136 | + | |
11137 | return bh; | |
11138 | } | |
11139 | ||
11140 | @@ -160,12 +164,14 @@ | |
11141 | struct buffer_head *bh; | |
11142 | unsigned long flags; | |
11143 | ||
11144 | - save_flags(flags); | |
11145 | - cli(); | |
11146 | - if ((bh = sh->bh_pool) == NULL) | |
11147 | - return NULL; | |
11148 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
11149 | + bh = sh->bh_pool; | |
11150 | + if (!bh) | |
11151 | + goto out_unlock; | |
11152 | sh->bh_pool = bh->b_next; | |
11153 | - restore_flags(flags); | |
11154 | +out_unlock: | |
11155 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
11156 | + | |
11157 | return bh; | |
11158 | } | |
11159 | ||
11160 | @@ -173,54 +179,52 @@ | |
11161 | { | |
11162 | unsigned long flags; | |
11163 | ||
11164 | - save_flags(flags); | |
11165 | - cli(); | |
11166 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
11167 | bh->b_next = sh->buffer_pool; | |
11168 | sh->buffer_pool = bh; | |
11169 | - restore_flags(flags); | |
11170 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
11171 | } | |
11172 | ||
11173 | static void put_free_bh(struct stripe_head *sh, struct buffer_head *bh) | |
11174 | { | |
11175 | unsigned long flags; | |
11176 | ||
11177 | - save_flags(flags); | |
11178 | - cli(); | |
11179 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
11180 | bh->b_next = sh->bh_pool; | |
11181 | sh->bh_pool = bh; | |
11182 | - restore_flags(flags); | |
11183 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
11184 | } | |
11185 | ||
11186 | -static struct stripe_head *get_free_stripe(struct raid5_data *raid_conf) | |
11187 | +static struct stripe_head *get_free_stripe(raid5_conf_t *conf) | |
11188 | { | |
11189 | struct stripe_head *sh; | |
11190 | unsigned long flags; | |
11191 | ||
11192 | save_flags(flags); | |
11193 | cli(); | |
11194 | - if ((sh = raid_conf->free_sh_list) == NULL) { | |
11195 | + if ((sh = conf->free_sh_list) == NULL) { | |
11196 | restore_flags(flags); | |
11197 | return NULL; | |
11198 | } | |
11199 | - raid_conf->free_sh_list = sh->free_next; | |
11200 | - raid_conf->nr_free_sh--; | |
11201 | - if (!raid_conf->nr_free_sh && raid_conf->free_sh_list) | |
11202 | + conf->free_sh_list = sh->free_next; | |
11203 | + conf->nr_free_sh--; | |
11204 | + if (!conf->nr_free_sh && conf->free_sh_list) | |
11205 | printk ("raid5: bug: free_sh_list != NULL, nr_free_sh == 0\n"); | |
11206 | restore_flags(flags); | |
11207 | - if (sh->hash_pprev || sh->nr_pending || sh->count) | |
11208 | + if (sh->hash_pprev || md_atomic_read(&sh->nr_pending) || sh->count) | |
11209 | printk("get_free_stripe(): bug\n"); | |
11210 | return sh; | |
11211 | } | |
11212 | ||
11213 | -static void put_free_stripe(struct raid5_data *raid_conf, struct stripe_head *sh) | |
11214 | +static void put_free_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |
11215 | { | |
11216 | unsigned long flags; | |
11217 | ||
11218 | save_flags(flags); | |
11219 | cli(); | |
11220 | - sh->free_next = raid_conf->free_sh_list; | |
11221 | - raid_conf->free_sh_list = sh; | |
11222 | - raid_conf->nr_free_sh++; | |
11223 | + sh->free_next = conf->free_sh_list; | |
11224 | + conf->free_sh_list = sh; | |
11225 | + conf->nr_free_sh++; | |
11226 | restore_flags(flags); | |
11227 | } | |
11228 | ||
11229 | @@ -324,8 +328,8 @@ | |
11230 | ||
11231 | static void kfree_stripe(struct stripe_head *sh) | |
11232 | { | |
11233 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11234 | - int disks = raid_conf->raid_disks, j; | |
11235 | + raid5_conf_t *conf = sh->raid_conf; | |
11236 | + int disks = conf->raid_disks, j; | |
11237 | ||
11238 | PRINTK(("kfree_stripe called, stripe %lu\n", sh->sector)); | |
11239 | if (sh->phase != PHASE_COMPLETE || stripe_locked(sh) || sh->count) { | |
11240 | @@ -338,19 +342,19 @@ | |
11241 | if (sh->bh_new[j] || sh->bh_copy[j]) | |
11242 | printk("raid5: bug: sector %lu, new %p, copy %p\n", sh->sector, sh->bh_new[j], sh->bh_copy[j]); | |
11243 | } | |
11244 | - remove_hash(raid_conf, sh); | |
11245 | - put_free_stripe(raid_conf, sh); | |
11246 | + remove_hash(conf, sh); | |
11247 | + put_free_stripe(conf, sh); | |
11248 | } | |
11249 | ||
11250 | -static int shrink_stripe_cache(struct raid5_data *raid_conf, int nr) | |
11251 | +static int shrink_stripe_cache(raid5_conf_t *conf, int nr) | |
11252 | { | |
11253 | struct stripe_head *sh; | |
11254 | int i, count = 0; | |
11255 | ||
11256 | - PRINTK(("shrink_stripe_cache called, %d/%d, clock %d\n", nr, raid_conf->nr_hashed_stripes, raid_conf->clock)); | |
11257 | + PRINTK(("shrink_stripe_cache called, %d/%d, clock %d\n", nr, conf->nr_hashed_stripes, conf->clock)); | |
11258 | for (i = 0; i < NR_HASH; i++) { | |
11259 | repeat: | |
11260 | - sh = raid_conf->stripe_hashtbl[(i + raid_conf->clock) & HASH_MASK]; | |
11261 | + sh = conf->stripe_hashtbl[(i + conf->clock) & HASH_MASK]; | |
11262 | for (; sh; sh = sh->hash_next) { | |
11263 | if (sh->phase != PHASE_COMPLETE) | |
11264 | continue; | |
11265 | @@ -360,30 +364,30 @@ | |
11266 | continue; | |
11267 | kfree_stripe(sh); | |
11268 | if (++count == nr) { | |
11269 | - PRINTK(("shrink completed, nr_hashed_stripes %d\n", raid_conf->nr_hashed_stripes)); | |
11270 | - raid_conf->clock = (i + raid_conf->clock) & HASH_MASK; | |
11271 | + PRINTK(("shrink completed, nr_hashed_stripes %d\n", conf->nr_hashed_stripes)); | |
11272 | + conf->clock = (i + conf->clock) & HASH_MASK; | |
11273 | return nr; | |
11274 | } | |
11275 | goto repeat; | |
11276 | } | |
11277 | } | |
11278 | - PRINTK(("shrink completed, nr_hashed_stripes %d\n", raid_conf->nr_hashed_stripes)); | |
11279 | + PRINTK(("shrink completed, nr_hashed_stripes %d\n", conf->nr_hashed_stripes)); | |
11280 | return count; | |
11281 | } | |
11282 | ||
11283 | -static struct stripe_head *find_stripe(struct raid5_data *raid_conf, unsigned long sector, int size) | |
11284 | +static struct stripe_head *find_stripe(raid5_conf_t *conf, unsigned long sector, int size) | |
11285 | { | |
11286 | struct stripe_head *sh; | |
11287 | ||
11288 | - if (raid_conf->buffer_size != size) { | |
11289 | - PRINTK(("switching size, %d --> %d\n", raid_conf->buffer_size, size)); | |
11290 | - shrink_stripe_cache(raid_conf, raid_conf->max_nr_stripes); | |
11291 | - raid_conf->buffer_size = size; | |
11292 | + if (conf->buffer_size != size) { | |
11293 | + PRINTK(("switching size, %d --> %d\n", conf->buffer_size, size)); | |
11294 | + shrink_stripe_cache(conf, conf->max_nr_stripes); | |
11295 | + conf->buffer_size = size; | |
11296 | } | |
11297 | ||
11298 | PRINTK(("find_stripe, sector %lu\n", sector)); | |
11299 | - for (sh = stripe_hash(raid_conf, sector, size); sh; sh = sh->hash_next) | |
11300 | - if (sh->sector == sector && sh->raid_conf == raid_conf) { | |
11301 | + for (sh = stripe_hash(conf, sector, size); sh; sh = sh->hash_next) | |
11302 | + if (sh->sector == sector && sh->raid_conf == conf) { | |
11303 | if (sh->size == size) { | |
11304 | PRINTK(("found stripe %lu\n", sector)); | |
11305 | return sh; | |
11306 | @@ -397,7 +401,7 @@ | |
11307 | return NULL; | |
11308 | } | |
11309 | ||
11310 | -static int grow_stripes(struct raid5_data *raid_conf, int num, int priority) | |
11311 | +static int grow_stripes(raid5_conf_t *conf, int num, int priority) | |
11312 | { | |
11313 | struct stripe_head *sh; | |
11314 | ||
11315 | @@ -405,62 +409,64 @@ | |
11316 | if ((sh = kmalloc(sizeof(struct stripe_head), priority)) == NULL) | |
11317 | return 1; | |
11318 | memset(sh, 0, sizeof(*sh)); | |
11319 | - if (grow_buffers(sh, 2 * raid_conf->raid_disks, PAGE_SIZE, priority)) { | |
11320 | - shrink_buffers(sh, 2 * raid_conf->raid_disks); | |
11321 | + sh->stripe_lock = MD_SPIN_LOCK_UNLOCKED; | |
11322 | + | |
11323 | + if (grow_buffers(sh, 2 * conf->raid_disks, PAGE_SIZE, priority)) { | |
11324 | + shrink_buffers(sh, 2 * conf->raid_disks); | |
11325 | kfree(sh); | |
11326 | return 1; | |
11327 | } | |
11328 | - if (grow_bh(sh, raid_conf->raid_disks, priority)) { | |
11329 | - shrink_buffers(sh, 2 * raid_conf->raid_disks); | |
11330 | - shrink_bh(sh, raid_conf->raid_disks); | |
11331 | + if (grow_bh(sh, conf->raid_disks, priority)) { | |
11332 | + shrink_buffers(sh, 2 * conf->raid_disks); | |
11333 | + shrink_bh(sh, conf->raid_disks); | |
11334 | kfree(sh); | |
11335 | return 1; | |
11336 | } | |
11337 | - put_free_stripe(raid_conf, sh); | |
11338 | - raid_conf->nr_stripes++; | |
11339 | + put_free_stripe(conf, sh); | |
11340 | + conf->nr_stripes++; | |
11341 | } | |
11342 | return 0; | |
11343 | } | |
11344 | ||
11345 | -static void shrink_stripes(struct raid5_data *raid_conf, int num) | |
11346 | +static void shrink_stripes(raid5_conf_t *conf, int num) | |
11347 | { | |
11348 | struct stripe_head *sh; | |
11349 | ||
11350 | while (num--) { | |
11351 | - sh = get_free_stripe(raid_conf); | |
11352 | + sh = get_free_stripe(conf); | |
11353 | if (!sh) | |
11354 | break; | |
11355 | - shrink_buffers(sh, raid_conf->raid_disks * 2); | |
11356 | - shrink_bh(sh, raid_conf->raid_disks); | |
11357 | + shrink_buffers(sh, conf->raid_disks * 2); | |
11358 | + shrink_bh(sh, conf->raid_disks); | |
11359 | kfree(sh); | |
11360 | - raid_conf->nr_stripes--; | |
11361 | + conf->nr_stripes--; | |
11362 | } | |
11363 | } | |
11364 | ||
11365 | -static struct stripe_head *kmalloc_stripe(struct raid5_data *raid_conf, unsigned long sector, int size) | |
11366 | +static struct stripe_head *kmalloc_stripe(raid5_conf_t *conf, unsigned long sector, int size) | |
11367 | { | |
11368 | struct stripe_head *sh = NULL, *tmp; | |
11369 | struct buffer_head *buffer_pool, *bh_pool; | |
11370 | ||
11371 | PRINTK(("kmalloc_stripe called\n")); | |
11372 | ||
11373 | - while ((sh = get_free_stripe(raid_conf)) == NULL) { | |
11374 | - shrink_stripe_cache(raid_conf, raid_conf->max_nr_stripes / 8); | |
11375 | - if ((sh = get_free_stripe(raid_conf)) != NULL) | |
11376 | + while ((sh = get_free_stripe(conf)) == NULL) { | |
11377 | + shrink_stripe_cache(conf, conf->max_nr_stripes / 8); | |
11378 | + if ((sh = get_free_stripe(conf)) != NULL) | |
11379 | break; | |
11380 | - if (!raid_conf->nr_pending_stripes) | |
11381 | + if (!conf->nr_pending_stripes) | |
11382 | printk("raid5: bug: nr_free_sh == 0, nr_pending_stripes == 0\n"); | |
11383 | - md_wakeup_thread(raid_conf->thread); | |
11384 | + md_wakeup_thread(conf->thread); | |
11385 | PRINTK(("waiting for some stripes to complete\n")); | |
11386 | - sleep_on(&raid_conf->wait_for_stripe); | |
11387 | + sleep_on(&conf->wait_for_stripe); | |
11388 | } | |
11389 | ||
11390 | /* | |
11391 | * The above might have slept, so perhaps another process | |
11392 | * already created the stripe for us.. | |
11393 | */ | |
11394 | - if ((tmp = find_stripe(raid_conf, sector, size)) != NULL) { | |
11395 | - put_free_stripe(raid_conf, sh); | |
11396 | + if ((tmp = find_stripe(conf, sector, size)) != NULL) { | |
11397 | + put_free_stripe(conf, sh); | |
11398 | wait_on_stripe(tmp); | |
11399 | return tmp; | |
11400 | } | |
11401 | @@ -472,25 +478,25 @@ | |
11402 | sh->bh_pool = bh_pool; | |
11403 | sh->phase = PHASE_COMPLETE; | |
11404 | sh->cmd = STRIPE_NONE; | |
11405 | - sh->raid_conf = raid_conf; | |
11406 | + sh->raid_conf = conf; | |
11407 | sh->sector = sector; | |
11408 | sh->size = size; | |
11409 | - raid_conf->nr_cached_stripes++; | |
11410 | - insert_hash(raid_conf, sh); | |
11411 | + conf->nr_cached_stripes++; | |
11412 | + insert_hash(conf, sh); | |
11413 | } else printk("raid5: bug: kmalloc_stripe() == NULL\n"); | |
11414 | return sh; | |
11415 | } | |
11416 | ||
11417 | -static struct stripe_head *get_stripe(struct raid5_data *raid_conf, unsigned long sector, int size) | |
11418 | +static struct stripe_head *get_stripe(raid5_conf_t *conf, unsigned long sector, int size) | |
11419 | { | |
11420 | struct stripe_head *sh; | |
11421 | ||
11422 | PRINTK(("get_stripe, sector %lu\n", sector)); | |
11423 | - sh = find_stripe(raid_conf, sector, size); | |
11424 | + sh = find_stripe(conf, sector, size); | |
11425 | if (sh) | |
11426 | wait_on_stripe(sh); | |
11427 | else | |
11428 | - sh = kmalloc_stripe(raid_conf, sector, size); | |
11429 | + sh = kmalloc_stripe(conf, sector, size); | |
11430 | return sh; | |
11431 | } | |
11432 | ||
11433 | @@ -523,7 +529,7 @@ | |
11434 | bh->b_end_io(bh, uptodate); | |
11435 | if (!uptodate) | |
11436 | printk(KERN_ALERT "raid5: %s: unrecoverable I/O error for " | |
11437 | - "block %lu\n", kdevname(bh->b_dev), bh->b_blocknr); | |
11438 | + "block %lu\n", partition_name(bh->b_dev), bh->b_blocknr); | |
11439 | } | |
11440 | ||
11441 | static inline void raid5_mark_buffer_uptodate (struct buffer_head *bh, int uptodate) | |
11442 | @@ -537,36 +543,35 @@ | |
11443 | static void raid5_end_request (struct buffer_head * bh, int uptodate) | |
11444 | { | |
11445 | struct stripe_head *sh = bh->b_dev_id; | |
11446 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11447 | - int disks = raid_conf->raid_disks, i; | |
11448 | + raid5_conf_t *conf = sh->raid_conf; | |
11449 | + int disks = conf->raid_disks, i; | |
11450 | unsigned long flags; | |
11451 | ||
11452 | PRINTK(("end_request %lu, nr_pending %d\n", sh->sector, sh->nr_pending)); | |
11453 | - save_flags(flags); | |
11454 | - cli(); | |
11455 | + md_spin_lock_irqsave(&sh->stripe_lock, flags); | |
11456 | raid5_mark_buffer_uptodate(bh, uptodate); | |
11457 | - --sh->nr_pending; | |
11458 | - if (!sh->nr_pending) { | |
11459 | - md_wakeup_thread(raid_conf->thread); | |
11460 | - atomic_inc(&raid_conf->nr_handle); | |
11461 | + if (atomic_dec_and_test(&sh->nr_pending)) { | |
11462 | + md_wakeup_thread(conf->thread); | |
11463 | + atomic_inc(&conf->nr_handle); | |
11464 | } | |
11465 | - if (!uptodate) | |
11466 | + if (!uptodate) { | |
11467 | md_error(bh->b_dev, bh->b_rdev); | |
11468 | - if (raid_conf->failed_disks) { | |
11469 | + } | |
11470 | + if (conf->failed_disks) { | |
11471 | for (i = 0; i < disks; i++) { | |
11472 | - if (raid_conf->disks[i].operational) | |
11473 | + if (conf->disks[i].operational) | |
11474 | continue; | |
11475 | if (bh != sh->bh_old[i] && bh != sh->bh_req[i] && bh != sh->bh_copy[i]) | |
11476 | continue; | |
11477 | - if (bh->b_rdev != raid_conf->disks[i].dev) | |
11478 | + if (bh->b_rdev != conf->disks[i].dev) | |
11479 | continue; | |
11480 | set_bit(STRIPE_ERROR, &sh->state); | |
11481 | } | |
11482 | } | |
11483 | - restore_flags(flags); | |
11484 | + md_spin_unlock_irqrestore(&sh->stripe_lock, flags); | |
11485 | } | |
11486 | ||
11487 | -static int raid5_map (struct md_dev *mddev, kdev_t *rdev, | |
11488 | +static int raid5_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
11489 | unsigned long *rsector, unsigned long size) | |
11490 | { | |
11491 | /* No complex mapping used: the core of the work is done in the | |
11492 | @@ -577,11 +582,10 @@ | |
11493 | ||
11494 | static void raid5_build_block (struct stripe_head *sh, struct buffer_head *bh, int i) | |
11495 | { | |
11496 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11497 | - struct md_dev *mddev = raid_conf->mddev; | |
11498 | - int minor = (int) (mddev - md_dev); | |
11499 | + raid5_conf_t *conf = sh->raid_conf; | |
11500 | + mddev_t *mddev = conf->mddev; | |
11501 | char *b_data; | |
11502 | - kdev_t dev = MKDEV(MD_MAJOR, minor); | |
11503 | + kdev_t dev = mddev_to_kdev(mddev); | |
11504 | int block = sh->sector / (sh->size >> 9); | |
11505 | ||
11506 | b_data = ((volatile struct buffer_head *) bh)->b_data; | |
11507 | @@ -589,7 +593,7 @@ | |
11508 | init_buffer(bh, dev, block, raid5_end_request, sh); | |
11509 | ((volatile struct buffer_head *) bh)->b_data = b_data; | |
11510 | ||
11511 | - bh->b_rdev = raid_conf->disks[i].dev; | |
11512 | + bh->b_rdev = conf->disks[i].dev; | |
11513 | bh->b_rsector = sh->sector; | |
11514 | ||
11515 | bh->b_state = (1 << BH_Req); | |
11516 | @@ -597,33 +601,62 @@ | |
11517 | bh->b_list = BUF_LOCKED; | |
11518 | } | |
11519 | ||
11520 | -static int raid5_error (struct md_dev *mddev, kdev_t dev) | |
11521 | +static int raid5_error (mddev_t *mddev, kdev_t dev) | |
11522 | { | |
11523 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
11524 | - md_superblock_t *sb = mddev->sb; | |
11525 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
11526 | + mdp_super_t *sb = mddev->sb; | |
11527 | struct disk_info *disk; | |
11528 | int i; | |
11529 | ||
11530 | PRINTK(("raid5_error called\n")); | |
11531 | - raid_conf->resync_parity = 0; | |
11532 | - for (i = 0, disk = raid_conf->disks; i < raid_conf->raid_disks; i++, disk++) | |
11533 | + conf->resync_parity = 0; | |
11534 | + for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) { | |
11535 | if (disk->dev == dev && disk->operational) { | |
11536 | disk->operational = 0; | |
11537 | - sb->disks[disk->number].state |= (1 << MD_FAULTY_DEVICE); | |
11538 | - sb->disks[disk->number].state &= ~(1 << MD_SYNC_DEVICE); | |
11539 | - sb->disks[disk->number].state &= ~(1 << MD_ACTIVE_DEVICE); | |
11540 | + mark_disk_faulty(sb->disks+disk->number); | |
11541 | + mark_disk_nonsync(sb->disks+disk->number); | |
11542 | + mark_disk_inactive(sb->disks+disk->number); | |
11543 | sb->active_disks--; | |
11544 | sb->working_disks--; | |
11545 | sb->failed_disks++; | |
11546 | mddev->sb_dirty = 1; | |
11547 | - raid_conf->working_disks--; | |
11548 | - raid_conf->failed_disks++; | |
11549 | - md_wakeup_thread(raid_conf->thread); | |
11550 | + conf->working_disks--; | |
11551 | + conf->failed_disks++; | |
11552 | + md_wakeup_thread(conf->thread); | |
11553 | printk (KERN_ALERT | |
11554 | - "RAID5: Disk failure on %s, disabling device." | |
11555 | - "Operation continuing on %d devices\n", | |
11556 | - kdevname (dev), raid_conf->working_disks); | |
11557 | + "raid5: Disk failure on %s, disabling device." | |
11558 | + " Operation continuing on %d devices\n", | |
11559 | + partition_name (dev), conf->working_disks); | |
11560 | + return -EIO; | |
11561 | } | |
11562 | + } | |
11563 | + /* | |
11564 | + * handle errors in spares (during reconstruction) | |
11565 | + */ | |
11566 | + if (conf->spare) { | |
11567 | + disk = conf->spare; | |
11568 | + if (disk->dev == dev) { | |
11569 | + printk (KERN_ALERT | |
11570 | + "raid5: Disk failure on spare %s\n", | |
11571 | + partition_name (dev)); | |
11572 | + if (!conf->spare->operational) { | |
11573 | + MD_BUG(); | |
11574 | + return -EIO; | |
11575 | + } | |
11576 | + disk->operational = 0; | |
11577 | + disk->write_only = 0; | |
11578 | + conf->spare = NULL; | |
11579 | + mark_disk_faulty(sb->disks+disk->number); | |
11580 | + mark_disk_nonsync(sb->disks+disk->number); | |
11581 | + mark_disk_inactive(sb->disks+disk->number); | |
11582 | + sb->spare_disks--; | |
11583 | + sb->working_disks--; | |
11584 | + sb->failed_disks++; | |
11585 | + | |
11586 | + return -EIO; | |
11587 | + } | |
11588 | + } | |
11589 | + MD_BUG(); | |
11590 | return 0; | |
11591 | } | |
11592 | ||
11593 | @@ -634,12 +667,12 @@ | |
11594 | static inline unsigned long | |
11595 | raid5_compute_sector (int r_sector, unsigned int raid_disks, unsigned int data_disks, | |
11596 | unsigned int * dd_idx, unsigned int * pd_idx, | |
11597 | - struct raid5_data *raid_conf) | |
11598 | + raid5_conf_t *conf) | |
11599 | { | |
11600 | unsigned int stripe; | |
11601 | int chunk_number, chunk_offset; | |
11602 | unsigned long new_sector; | |
11603 | - int sectors_per_chunk = raid_conf->chunk_size >> 9; | |
11604 | + int sectors_per_chunk = conf->chunk_size >> 9; | |
11605 | ||
11606 | /* First compute the information on this sector */ | |
11607 | ||
11608 | @@ -662,9 +695,9 @@ | |
11609 | /* | |
11610 | * Select the parity disk based on the user selected algorithm. | |
11611 | */ | |
11612 | - if (raid_conf->level == 4) | |
11613 | + if (conf->level == 4) | |
11614 | *pd_idx = data_disks; | |
11615 | - else switch (raid_conf->algorithm) { | |
11616 | + else switch (conf->algorithm) { | |
11617 | case ALGORITHM_LEFT_ASYMMETRIC: | |
11618 | *pd_idx = data_disks - stripe % raid_disks; | |
11619 | if (*dd_idx >= *pd_idx) | |
11620 | @@ -684,7 +717,7 @@ | |
11621 | *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks; | |
11622 | break; | |
11623 | default: | |
11624 | - printk ("raid5: unsupported algorithm %d\n", raid_conf->algorithm); | |
11625 | + printk ("raid5: unsupported algorithm %d\n", conf->algorithm); | |
11626 | } | |
11627 | ||
11628 | /* | |
11629 | @@ -705,16 +738,16 @@ | |
11630 | ||
11631 | static unsigned long compute_blocknr(struct stripe_head *sh, int i) | |
11632 | { | |
11633 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11634 | - int raid_disks = raid_conf->raid_disks, data_disks = raid_disks - 1; | |
11635 | + raid5_conf_t *conf = sh->raid_conf; | |
11636 | + int raid_disks = conf->raid_disks, data_disks = raid_disks - 1; | |
11637 | unsigned long new_sector = sh->sector, check; | |
11638 | - int sectors_per_chunk = raid_conf->chunk_size >> 9; | |
11639 | + int sectors_per_chunk = conf->chunk_size >> 9; | |
11640 | unsigned long stripe = new_sector / sectors_per_chunk; | |
11641 | int chunk_offset = new_sector % sectors_per_chunk; | |
11642 | int chunk_number, dummy1, dummy2, dd_idx = i; | |
11643 | unsigned long r_sector, blocknr; | |
11644 | ||
11645 | - switch (raid_conf->algorithm) { | |
11646 | + switch (conf->algorithm) { | |
11647 | case ALGORITHM_LEFT_ASYMMETRIC: | |
11648 | case ALGORITHM_RIGHT_ASYMMETRIC: | |
11649 | if (i > sh->pd_idx) | |
11650 | @@ -727,14 +760,14 @@ | |
11651 | i -= (sh->pd_idx + 1); | |
11652 | break; | |
11653 | default: | |
11654 | - printk ("raid5: unsupported algorithm %d\n", raid_conf->algorithm); | |
11655 | + printk ("raid5: unsupported algorithm %d\n", conf->algorithm); | |
11656 | } | |
11657 | ||
11658 | chunk_number = stripe * data_disks + i; | |
11659 | r_sector = chunk_number * sectors_per_chunk + chunk_offset; | |
11660 | blocknr = r_sector / (sh->size >> 9); | |
11661 | ||
11662 | - check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, raid_conf); | |
11663 | + check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); | |
11664 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { | |
11665 | printk("compute_blocknr: map not correct\n"); | |
11666 | return 0; | |
11667 | @@ -742,36 +775,11 @@ | |
11668 | return blocknr; | |
11669 | } | |
11670 | ||
11671 | -#ifdef HAVE_ARCH_XORBLOCK | |
11672 | -static void xor_block(struct buffer_head *dest, struct buffer_head *source) | |
11673 | -{ | |
11674 | - __xor_block((char *) dest->b_data, (char *) source->b_data, dest->b_size); | |
11675 | -} | |
11676 | -#else | |
11677 | -static void xor_block(struct buffer_head *dest, struct buffer_head *source) | |
11678 | -{ | |
11679 | - long lines = dest->b_size / (sizeof (long)) / 8, i; | |
11680 | - long *destp = (long *) dest->b_data, *sourcep = (long *) source->b_data; | |
11681 | - | |
11682 | - for (i = lines; i > 0; i--) { | |
11683 | - *(destp + 0) ^= *(sourcep + 0); | |
11684 | - *(destp + 1) ^= *(sourcep + 1); | |
11685 | - *(destp + 2) ^= *(sourcep + 2); | |
11686 | - *(destp + 3) ^= *(sourcep + 3); | |
11687 | - *(destp + 4) ^= *(sourcep + 4); | |
11688 | - *(destp + 5) ^= *(sourcep + 5); | |
11689 | - *(destp + 6) ^= *(sourcep + 6); | |
11690 | - *(destp + 7) ^= *(sourcep + 7); | |
11691 | - destp += 8; | |
11692 | - sourcep += 8; | |
11693 | - } | |
11694 | -} | |
11695 | -#endif | |
11696 | - | |
11697 | static void compute_block(struct stripe_head *sh, int dd_idx) | |
11698 | { | |
11699 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11700 | - int i, disks = raid_conf->raid_disks; | |
11701 | + raid5_conf_t *conf = sh->raid_conf; | |
11702 | + int i, count, disks = conf->raid_disks; | |
11703 | + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; | |
11704 | ||
11705 | PRINTK(("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx)); | |
11706 | ||
11707 | @@ -780,69 +788,100 @@ | |
11708 | raid5_build_block(sh, sh->bh_old[dd_idx], dd_idx); | |
11709 | ||
11710 | memset(sh->bh_old[dd_idx]->b_data, 0, sh->size); | |
11711 | + bh_ptr[0] = sh->bh_old[dd_idx]; | |
11712 | + count = 1; | |
11713 | for (i = 0; i < disks; i++) { | |
11714 | if (i == dd_idx) | |
11715 | continue; | |
11716 | if (sh->bh_old[i]) { | |
11717 | - xor_block(sh->bh_old[dd_idx], sh->bh_old[i]); | |
11718 | - continue; | |
11719 | - } else | |
11720 | + bh_ptr[count++] = sh->bh_old[i]; | |
11721 | + } else { | |
11722 | printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i); | |
11723 | + } | |
11724 | + if (count == MAX_XOR_BLOCKS) { | |
11725 | + xor_block(count, &bh_ptr[0]); | |
11726 | + count = 1; | |
11727 | + } | |
11728 | + } | |
11729 | + if(count != 1) { | |
11730 | + xor_block(count, &bh_ptr[0]); | |
11731 | } | |
11732 | raid5_mark_buffer_uptodate(sh->bh_old[dd_idx], 1); | |
11733 | } | |
11734 | ||
11735 | static void compute_parity(struct stripe_head *sh, int method) | |
11736 | { | |
11737 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11738 | - int i, pd_idx = sh->pd_idx, disks = raid_conf->raid_disks; | |
11739 | + raid5_conf_t *conf = sh->raid_conf; | |
11740 | + int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, lowprio, count; | |
11741 | + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; | |
11742 | ||
11743 | PRINTK(("compute_parity, stripe %lu, method %d\n", sh->sector, method)); | |
11744 | + lowprio = 1; | |
11745 | for (i = 0; i < disks; i++) { | |
11746 | if (i == pd_idx || !sh->bh_new[i]) | |
11747 | continue; | |
11748 | if (!sh->bh_copy[i]) | |
11749 | sh->bh_copy[i] = raid5_kmalloc_buffer(sh, sh->size); | |
11750 | raid5_build_block(sh, sh->bh_copy[i], i); | |
11751 | + if (!buffer_lowprio(sh->bh_new[i])) | |
11752 | + lowprio = 0; | |
11753 | + else | |
11754 | + mark_buffer_lowprio(sh->bh_copy[i]); | |
11755 | mark_buffer_clean(sh->bh_new[i]); | |
11756 | memcpy(sh->bh_copy[i]->b_data, sh->bh_new[i]->b_data, sh->size); | |
11757 | } | |
11758 | if (sh->bh_copy[pd_idx] == NULL) | |
11759 | sh->bh_copy[pd_idx] = raid5_kmalloc_buffer(sh, sh->size); | |
11760 | raid5_build_block(sh, sh->bh_copy[pd_idx], sh->pd_idx); | |
11761 | + if (lowprio) | |
11762 | + mark_buffer_lowprio(sh->bh_copy[pd_idx]); | |
11763 | ||
11764 | if (method == RECONSTRUCT_WRITE) { | |
11765 | memset(sh->bh_copy[pd_idx]->b_data, 0, sh->size); | |
11766 | + bh_ptr[0] = sh->bh_copy[pd_idx]; | |
11767 | + count = 1; | |
11768 | for (i = 0; i < disks; i++) { | |
11769 | if (i == sh->pd_idx) | |
11770 | continue; | |
11771 | if (sh->bh_new[i]) { | |
11772 | - xor_block(sh->bh_copy[pd_idx], sh->bh_copy[i]); | |
11773 | - continue; | |
11774 | + bh_ptr[count++] = sh->bh_copy[i]; | |
11775 | + } else if (sh->bh_old[i]) { | |
11776 | + bh_ptr[count++] = sh->bh_old[i]; | |
11777 | } | |
11778 | - if (sh->bh_old[i]) { | |
11779 | - xor_block(sh->bh_copy[pd_idx], sh->bh_old[i]); | |
11780 | - continue; | |
11781 | + if (count == MAX_XOR_BLOCKS) { | |
11782 | + xor_block(count, &bh_ptr[0]); | |
11783 | + count = 1; | |
11784 | } | |
11785 | } | |
11786 | + if (count != 1) { | |
11787 | + xor_block(count, &bh_ptr[0]); | |
11788 | + } | |
11789 | } else if (method == READ_MODIFY_WRITE) { | |
11790 | memcpy(sh->bh_copy[pd_idx]->b_data, sh->bh_old[pd_idx]->b_data, sh->size); | |
11791 | + bh_ptr[0] = sh->bh_copy[pd_idx]; | |
11792 | + count = 1; | |
11793 | for (i = 0; i < disks; i++) { | |
11794 | if (i == sh->pd_idx) | |
11795 | continue; | |
11796 | if (sh->bh_new[i] && sh->bh_old[i]) { | |
11797 | - xor_block(sh->bh_copy[pd_idx], sh->bh_copy[i]); | |
11798 | - xor_block(sh->bh_copy[pd_idx], sh->bh_old[i]); | |
11799 | - continue; | |
11800 | + bh_ptr[count++] = sh->bh_copy[i]; | |
11801 | + bh_ptr[count++] = sh->bh_old[i]; | |
11802 | + } | |
11803 | + if (count >= (MAX_XOR_BLOCKS - 1)) { | |
11804 | + xor_block(count, &bh_ptr[0]); | |
11805 | + count = 1; | |
11806 | } | |
11807 | } | |
11808 | + if (count != 1) { | |
11809 | + xor_block(count, &bh_ptr[0]); | |
11810 | + } | |
11811 | } | |
11812 | raid5_mark_buffer_uptodate(sh->bh_copy[pd_idx], 1); | |
11813 | } | |
11814 | ||
11815 | static void add_stripe_bh (struct stripe_head *sh, struct buffer_head *bh, int dd_idx, int rw) | |
11816 | { | |
11817 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11818 | + raid5_conf_t *conf = sh->raid_conf; | |
11819 | struct buffer_head *bh_req; | |
11820 | ||
11821 | if (sh->bh_new[dd_idx]) { | |
11822 | @@ -860,19 +899,22 @@ | |
11823 | if (sh->phase == PHASE_COMPLETE && sh->cmd == STRIPE_NONE) { | |
11824 | sh->phase = PHASE_BEGIN; | |
11825 | sh->cmd = (rw == READ) ? STRIPE_READ : STRIPE_WRITE; | |
11826 | - raid_conf->nr_pending_stripes++; | |
11827 | - atomic_inc(&raid_conf->nr_handle); | |
11828 | + conf->nr_pending_stripes++; | |
11829 | + atomic_inc(&conf->nr_handle); | |
11830 | } | |
11831 | sh->bh_new[dd_idx] = bh; | |
11832 | sh->bh_req[dd_idx] = bh_req; | |
11833 | sh->cmd_new[dd_idx] = rw; | |
11834 | sh->new[dd_idx] = 1; | |
11835 | + | |
11836 | + if (buffer_lowprio(bh)) | |
11837 | + mark_buffer_lowprio(bh_req); | |
11838 | } | |
11839 | ||
11840 | static void complete_stripe(struct stripe_head *sh) | |
11841 | { | |
11842 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11843 | - int disks = raid_conf->raid_disks; | |
11844 | + raid5_conf_t *conf = sh->raid_conf; | |
11845 | + int disks = conf->raid_disks; | |
11846 | int i, new = 0; | |
11847 | ||
11848 | PRINTK(("complete_stripe %lu\n", sh->sector)); | |
11849 | @@ -909,6 +951,22 @@ | |
11850 | } | |
11851 | } | |
11852 | ||
11853 | + | |
11854 | +static int is_stripe_lowprio(struct stripe_head *sh, int disks) | |
11855 | +{ | |
11856 | + int i, lowprio = 1; | |
11857 | + | |
11858 | + for (i = 0; i < disks; i++) { | |
11859 | + if (sh->bh_new[i]) | |
11860 | + if (!buffer_lowprio(sh->bh_new[i])) | |
11861 | + lowprio = 0; | |
11862 | + if (sh->bh_old[i]) | |
11863 | + if (!buffer_lowprio(sh->bh_old[i])) | |
11864 | + lowprio = 0; | |
11865 | + } | |
11866 | + return lowprio; | |
11867 | +} | |
11868 | + | |
11869 | /* | |
11870 | * handle_stripe() is our main logic routine. Note that: | |
11871 | * | |
11872 | @@ -919,28 +977,27 @@ | |
11873 | * 2. We should be careful to set sh->nr_pending whenever we sleep, | |
11874 | * to prevent re-entry of handle_stripe() for the same sh. | |
11875 | * | |
11876 | - * 3. raid_conf->failed_disks and disk->operational can be changed | |
11877 | + * 3. conf->failed_disks and disk->operational can be changed | |
11878 | * from an interrupt. This complicates things a bit, but it allows | |
11879 | * us to stop issuing requests for a failed drive as soon as possible. | |
11880 | */ | |
11881 | static void handle_stripe(struct stripe_head *sh) | |
11882 | { | |
11883 | - struct raid5_data *raid_conf = sh->raid_conf; | |
11884 | - struct md_dev *mddev = raid_conf->mddev; | |
11885 | - int minor = (int) (mddev - md_dev); | |
11886 | + raid5_conf_t *conf = sh->raid_conf; | |
11887 | + mddev_t *mddev = conf->mddev; | |
11888 | struct buffer_head *bh; | |
11889 | - int disks = raid_conf->raid_disks; | |
11890 | - int i, nr = 0, nr_read = 0, nr_write = 0; | |
11891 | + int disks = conf->raid_disks; | |
11892 | + int i, nr = 0, nr_read = 0, nr_write = 0, lowprio; | |
11893 | int nr_cache = 0, nr_cache_other = 0, nr_cache_overwrite = 0, parity = 0; | |
11894 | int nr_failed_other = 0, nr_failed_overwrite = 0, parity_failed = 0; | |
11895 | int reading = 0, nr_writing = 0; | |
11896 | int method1 = INT_MAX, method2 = INT_MAX; | |
11897 | int block; | |
11898 | unsigned long flags; | |
11899 | - int operational[MD_SB_DISKS], failed_disks = raid_conf->failed_disks; | |
11900 | + int operational[MD_SB_DISKS], failed_disks = conf->failed_disks; | |
11901 | ||
11902 | PRINTK(("handle_stripe(), stripe %lu\n", sh->sector)); | |
11903 | - if (sh->nr_pending) { | |
11904 | + if (md_atomic_read(&sh->nr_pending)) { | |
11905 | printk("handle_stripe(), stripe %lu, io still pending\n", sh->sector); | |
11906 | return; | |
11907 | } | |
11908 | @@ -949,9 +1006,9 @@ | |
11909 | return; | |
11910 | } | |
11911 | ||
11912 | - atomic_dec(&raid_conf->nr_handle); | |
11913 | + atomic_dec(&conf->nr_handle); | |
11914 | ||
11915 | - if (test_and_clear_bit(STRIPE_ERROR, &sh->state)) { | |
11916 | + if (md_test_and_clear_bit(STRIPE_ERROR, &sh->state)) { | |
11917 | printk("raid5: restarting stripe %lu\n", sh->sector); | |
11918 | sh->phase = PHASE_BEGIN; | |
11919 | } | |
11920 | @@ -969,11 +1026,11 @@ | |
11921 | save_flags(flags); | |
11922 | cli(); | |
11923 | for (i = 0; i < disks; i++) { | |
11924 | - operational[i] = raid_conf->disks[i].operational; | |
11925 | - if (i == sh->pd_idx && raid_conf->resync_parity) | |
11926 | + operational[i] = conf->disks[i].operational; | |
11927 | + if (i == sh->pd_idx && conf->resync_parity) | |
11928 | operational[i] = 0; | |
11929 | } | |
11930 | - failed_disks = raid_conf->failed_disks; | |
11931 | + failed_disks = conf->failed_disks; | |
11932 | restore_flags(flags); | |
11933 | ||
11934 | if (failed_disks > 1) { | |
11935 | @@ -1017,7 +1074,7 @@ | |
11936 | } | |
11937 | ||
11938 | if (nr_write && nr_read) | |
11939 | - printk("raid5: bug, nr_write == %d, nr_read == %d, sh->cmd == %d\n", nr_write, nr_read, sh->cmd); | |
11940 | + printk("raid5: bug, nr_write ==`%d, nr_read == %d, sh->cmd == %d\n", nr_write, nr_read, sh->cmd); | |
11941 | ||
11942 | if (nr_write) { | |
11943 | /* | |
11944 | @@ -1030,7 +1087,7 @@ | |
11945 | if (sh->bh_new[i]) | |
11946 | continue; | |
11947 | block = (int) compute_blocknr(sh, i); | |
11948 | - bh = find_buffer(MKDEV(MD_MAJOR, minor), block, sh->size); | |
11949 | + bh = find_buffer(mddev_to_kdev(mddev), block, sh->size); | |
11950 | if (bh && bh->b_count == 0 && buffer_dirty(bh) && !buffer_locked(bh)) { | |
11951 | PRINTK(("Whee.. sector %lu, index %d (%d) found in the buffer cache!\n", sh->sector, i, block)); | |
11952 | add_stripe_bh(sh, bh, i, WRITE); | |
11953 | @@ -1064,21 +1121,22 @@ | |
11954 | ||
11955 | if (!method1 || !method2) { | |
11956 | lock_stripe(sh); | |
11957 | - sh->nr_pending++; | |
11958 | + lowprio = is_stripe_lowprio(sh, disks); | |
11959 | + atomic_inc(&sh->nr_pending); | |
11960 | sh->phase = PHASE_WRITE; | |
11961 | compute_parity(sh, method1 <= method2 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); | |
11962 | for (i = 0; i < disks; i++) { | |
11963 | - if (!operational[i] && !raid_conf->spare && !raid_conf->resync_parity) | |
11964 | + if (!operational[i] && !conf->spare && !conf->resync_parity) | |
11965 | continue; | |
11966 | if (i == sh->pd_idx || sh->bh_new[i]) | |
11967 | nr_writing++; | |
11968 | } | |
11969 | ||
11970 | - sh->nr_pending = nr_writing; | |
11971 | - PRINTK(("handle_stripe() %lu, writing back %d\n", sh->sector, sh->nr_pending)); | |
11972 | + md_atomic_set(&sh->nr_pending, nr_writing); | |
11973 | + PRINTK(("handle_stripe() %lu, writing back %d\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
11974 | ||
11975 | for (i = 0; i < disks; i++) { | |
11976 | - if (!operational[i] && !raid_conf->spare && !raid_conf->resync_parity) | |
11977 | + if (!operational[i] && !conf->spare && !conf->resync_parity) | |
11978 | continue; | |
11979 | bh = sh->bh_copy[i]; | |
11980 | if (i != sh->pd_idx && ((bh == NULL) ^ (sh->bh_new[i] == NULL))) | |
11981 | @@ -1089,18 +1147,30 @@ | |
11982 | bh->b_state |= (1<<BH_Dirty); | |
11983 | PRINTK(("making request for buffer %d\n", i)); | |
11984 | clear_bit(BH_Lock, &bh->b_state); | |
11985 | - if (!operational[i] && !raid_conf->resync_parity) { | |
11986 | - bh->b_rdev = raid_conf->spare->dev; | |
11987 | - make_request(MAJOR(raid_conf->spare->dev), WRITE, bh); | |
11988 | - } else | |
11989 | - make_request(MAJOR(raid_conf->disks[i].dev), WRITE, bh); | |
11990 | + if (!operational[i] && !conf->resync_parity) { | |
11991 | + bh->b_rdev = conf->spare->dev; | |
11992 | + make_request(MAJOR(conf->spare->dev), WRITE, bh); | |
11993 | + } else { | |
11994 | +#if 0 | |
11995 | + make_request(MAJOR(conf->disks[i].dev), WRITE, bh); | |
11996 | +#else | |
11997 | + if (!lowprio || (i==sh->pd_idx)) | |
11998 | + make_request(MAJOR(conf->disks[i].dev), WRITE, bh); | |
11999 | + else { | |
12000 | + mark_buffer_clean(bh); | |
12001 | + raid5_end_request(bh,1); | |
12002 | + sh->new[i] = 0; | |
12003 | + } | |
12004 | +#endif | |
12005 | + } | |
12006 | } | |
12007 | } | |
12008 | return; | |
12009 | } | |
12010 | ||
12011 | lock_stripe(sh); | |
12012 | - sh->nr_pending++; | |
12013 | + lowprio = is_stripe_lowprio(sh, disks); | |
12014 | + atomic_inc(&sh->nr_pending); | |
12015 | if (method1 < method2) { | |
12016 | sh->write_method = RECONSTRUCT_WRITE; | |
12017 | for (i = 0; i < disks; i++) { | |
12018 | @@ -1110,6 +1180,8 @@ | |
12019 | continue; | |
12020 | sh->bh_old[i] = raid5_kmalloc_buffer(sh, sh->size); | |
12021 | raid5_build_block(sh, sh->bh_old[i], i); | |
12022 | + if (lowprio) | |
12023 | + mark_buffer_lowprio(sh->bh_old[i]); | |
12024 | reading++; | |
12025 | } | |
12026 | } else { | |
12027 | @@ -1121,19 +1193,21 @@ | |
12028 | continue; | |
12029 | sh->bh_old[i] = raid5_kmalloc_buffer(sh, sh->size); | |
12030 | raid5_build_block(sh, sh->bh_old[i], i); | |
12031 | + if (lowprio) | |
12032 | + mark_buffer_lowprio(sh->bh_old[i]); | |
12033 | reading++; | |
12034 | } | |
12035 | } | |
12036 | sh->phase = PHASE_READ_OLD; | |
12037 | - sh->nr_pending = reading; | |
12038 | - PRINTK(("handle_stripe() %lu, reading %d old buffers\n", sh->sector, sh->nr_pending)); | |
12039 | + md_atomic_set(&sh->nr_pending, reading); | |
12040 | + PRINTK(("handle_stripe() %lu, reading %d old buffers\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
12041 | for (i = 0; i < disks; i++) { | |
12042 | if (!sh->bh_old[i]) | |
12043 | continue; | |
12044 | if (buffer_uptodate(sh->bh_old[i])) | |
12045 | continue; | |
12046 | clear_bit(BH_Lock, &sh->bh_old[i]->b_state); | |
12047 | - make_request(MAJOR(raid_conf->disks[i].dev), READ, sh->bh_old[i]); | |
12048 | + make_request(MAJOR(conf->disks[i].dev), READ, sh->bh_old[i]); | |
12049 | } | |
12050 | } else { | |
12051 | /* | |
12052 | @@ -1141,7 +1215,8 @@ | |
12053 | */ | |
12054 | method1 = nr_read - nr_cache_overwrite; | |
12055 | lock_stripe(sh); | |
12056 | - sh->nr_pending++; | |
12057 | + lowprio = is_stripe_lowprio(sh,disks); | |
12058 | + atomic_inc(&sh->nr_pending); | |
12059 | ||
12060 | PRINTK(("handle_stripe(), sector %lu, nr_read %d, nr_cache %d, method1 %d\n", sh->sector, nr_read, nr_cache, method1)); | |
12061 | if (!method1 || (method1 == 1 && nr_cache == disks - 1)) { | |
12062 | @@ -1149,18 +1224,22 @@ | |
12063 | for (i = 0; i < disks; i++) { | |
12064 | if (!sh->bh_new[i]) | |
12065 | continue; | |
12066 | - if (!sh->bh_old[i]) | |
12067 | + if (!sh->bh_old[i]) { | |
12068 | compute_block(sh, i); | |
12069 | + if (lowprio) | |
12070 | + mark_buffer_lowprio | |
12071 | + (sh->bh_old[i]); | |
12072 | + } | |
12073 | memcpy(sh->bh_new[i]->b_data, sh->bh_old[i]->b_data, sh->size); | |
12074 | } | |
12075 | - sh->nr_pending--; | |
12076 | + atomic_dec(&sh->nr_pending); | |
12077 | complete_stripe(sh); | |
12078 | return; | |
12079 | } | |
12080 | if (nr_failed_overwrite) { | |
12081 | sh->phase = PHASE_READ_OLD; | |
12082 | - sh->nr_pending = (disks - 1) - nr_cache; | |
12083 | - PRINTK(("handle_stripe() %lu, phase READ_OLD, pending %d\n", sh->sector, sh->nr_pending)); | |
12084 | + md_atomic_set(&sh->nr_pending, (disks - 1) - nr_cache); | |
12085 | + PRINTK(("handle_stripe() %lu, phase READ_OLD, pending %d\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
12086 | for (i = 0; i < disks; i++) { | |
12087 | if (sh->bh_old[i]) | |
12088 | continue; | |
12089 | @@ -1168,13 +1247,16 @@ | |
12090 | continue; | |
12091 | sh->bh_old[i] = raid5_kmalloc_buffer(sh, sh->size); | |
12092 | raid5_build_block(sh, sh->bh_old[i], i); | |
12093 | + if (lowprio) | |
12094 | + mark_buffer_lowprio(sh->bh_old[i]); | |
12095 | clear_bit(BH_Lock, &sh->bh_old[i]->b_state); | |
12096 | - make_request(MAJOR(raid_conf->disks[i].dev), READ, sh->bh_old[i]); | |
12097 | + make_request(MAJOR(conf->disks[i].dev), READ, sh->bh_old[i]); | |
12098 | } | |
12099 | } else { | |
12100 | sh->phase = PHASE_READ; | |
12101 | - sh->nr_pending = nr_read - nr_cache_overwrite; | |
12102 | - PRINTK(("handle_stripe() %lu, phase READ, pending %d\n", sh->sector, sh->nr_pending)); | |
12103 | + md_atomic_set(&sh->nr_pending, | |
12104 | + nr_read - nr_cache_overwrite); | |
12105 | + PRINTK(("handle_stripe() %lu, phase READ, pending %d\n", sh->sector, md_atomic_read(&sh->nr_pending))); | |
12106 | for (i = 0; i < disks; i++) { | |
12107 | if (!sh->bh_new[i]) | |
12108 | continue; | |
12109 | @@ -1182,16 +1264,16 @@ | |
12110 | memcpy(sh->bh_new[i]->b_data, sh->bh_old[i]->b_data, sh->size); | |
12111 | continue; | |
12112 | } | |
12113 | - make_request(MAJOR(raid_conf->disks[i].dev), READ, sh->bh_req[i]); | |
12114 | + make_request(MAJOR(conf->disks[i].dev), READ, sh->bh_req[i]); | |
12115 | } | |
12116 | } | |
12117 | } | |
12118 | } | |
12119 | ||
12120 | -static int raid5_make_request (struct md_dev *mddev, int rw, struct buffer_head * bh) | |
12121 | +static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh) | |
12122 | { | |
12123 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
12124 | - const unsigned int raid_disks = raid_conf->raid_disks; | |
12125 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
12126 | + const unsigned int raid_disks = conf->raid_disks; | |
12127 | const unsigned int data_disks = raid_disks - 1; | |
12128 | unsigned int dd_idx, pd_idx; | |
12129 | unsigned long new_sector; | |
12130 | @@ -1202,15 +1284,15 @@ | |
12131 | if (rw == WRITEA) rw = WRITE; | |
12132 | ||
12133 | new_sector = raid5_compute_sector(bh->b_rsector, raid_disks, data_disks, | |
12134 | - &dd_idx, &pd_idx, raid_conf); | |
12135 | + &dd_idx, &pd_idx, conf); | |
12136 | ||
12137 | PRINTK(("raid5_make_request, sector %lu\n", new_sector)); | |
12138 | repeat: | |
12139 | - sh = get_stripe(raid_conf, new_sector, bh->b_size); | |
12140 | + sh = get_stripe(conf, new_sector, bh->b_size); | |
12141 | if ((rw == READ && sh->cmd == STRIPE_WRITE) || (rw == WRITE && sh->cmd == STRIPE_READ)) { | |
12142 | PRINTK(("raid5: lock contention, rw == %d, sh->cmd == %d\n", rw, sh->cmd)); | |
12143 | lock_stripe(sh); | |
12144 | - if (!sh->nr_pending) | |
12145 | + if (!md_atomic_read(&sh->nr_pending)) | |
12146 | handle_stripe(sh); | |
12147 | goto repeat; | |
12148 | } | |
12149 | @@ -1221,24 +1303,24 @@ | |
12150 | printk("raid5: bug: stripe->bh_new[%d], sector %lu exists\n", dd_idx, sh->sector); | |
12151 | printk("raid5: bh %p, bh_new %p\n", bh, sh->bh_new[dd_idx]); | |
12152 | lock_stripe(sh); | |
12153 | - md_wakeup_thread(raid_conf->thread); | |
12154 | + md_wakeup_thread(conf->thread); | |
12155 | wait_on_stripe(sh); | |
12156 | goto repeat; | |
12157 | } | |
12158 | add_stripe_bh(sh, bh, dd_idx, rw); | |
12159 | ||
12160 | - md_wakeup_thread(raid_conf->thread); | |
12161 | + md_wakeup_thread(conf->thread); | |
12162 | return 0; | |
12163 | } | |
12164 | ||
12165 | static void unplug_devices(struct stripe_head *sh) | |
12166 | { | |
12167 | #if 0 | |
12168 | - struct raid5_data *raid_conf = sh->raid_conf; | |
12169 | + raid5_conf_t *conf = sh->raid_conf; | |
12170 | int i; | |
12171 | ||
12172 | - for (i = 0; i < raid_conf->raid_disks; i++) | |
12173 | - unplug_device(blk_dev + MAJOR(raid_conf->disks[i].dev)); | |
12174 | + for (i = 0; i < conf->raid_disks; i++) | |
12175 | + unplug_device(blk_dev + MAJOR(conf->disks[i].dev)); | |
12176 | #endif | |
12177 | } | |
12178 | ||
12179 | @@ -1252,8 +1334,8 @@ | |
12180 | static void raid5d (void *data) | |
12181 | { | |
12182 | struct stripe_head *sh; | |
12183 | - struct raid5_data *raid_conf = data; | |
12184 | - struct md_dev *mddev = raid_conf->mddev; | |
12185 | + raid5_conf_t *conf = data; | |
12186 | + mddev_t *mddev = conf->mddev; | |
12187 | int i, handled = 0, unplug = 0; | |
12188 | unsigned long flags; | |
12189 | ||
12190 | @@ -1261,47 +1343,47 @@ | |
12191 | ||
12192 | if (mddev->sb_dirty) { | |
12193 | mddev->sb_dirty = 0; | |
12194 | - md_update_sb((int) (mddev - md_dev)); | |
12195 | + md_update_sb(mddev); | |
12196 | } | |
12197 | for (i = 0; i < NR_HASH; i++) { | |
12198 | repeat: | |
12199 | - sh = raid_conf->stripe_hashtbl[i]; | |
12200 | + sh = conf->stripe_hashtbl[i]; | |
12201 | for (; sh; sh = sh->hash_next) { | |
12202 | - if (sh->raid_conf != raid_conf) | |
12203 | + if (sh->raid_conf != conf) | |
12204 | continue; | |
12205 | if (sh->phase == PHASE_COMPLETE) | |
12206 | continue; | |
12207 | - if (sh->nr_pending) | |
12208 | + if (md_atomic_read(&sh->nr_pending)) | |
12209 | continue; | |
12210 | - if (sh->sector == raid_conf->next_sector) { | |
12211 | - raid_conf->sector_count += (sh->size >> 9); | |
12212 | - if (raid_conf->sector_count >= 128) | |
12213 | + if (sh->sector == conf->next_sector) { | |
12214 | + conf->sector_count += (sh->size >> 9); | |
12215 | + if (conf->sector_count >= 128) | |
12216 | unplug = 1; | |
12217 | } else | |
12218 | unplug = 1; | |
12219 | if (unplug) { | |
12220 | - PRINTK(("unplugging devices, sector == %lu, count == %d\n", sh->sector, raid_conf->sector_count)); | |
12221 | + PRINTK(("unplugging devices, sector == %lu, count == %d\n", sh->sector, conf->sector_count)); | |
12222 | unplug_devices(sh); | |
12223 | unplug = 0; | |
12224 | - raid_conf->sector_count = 0; | |
12225 | + conf->sector_count = 0; | |
12226 | } | |
12227 | - raid_conf->next_sector = sh->sector + (sh->size >> 9); | |
12228 | + conf->next_sector = sh->sector + (sh->size >> 9); | |
12229 | handled++; | |
12230 | handle_stripe(sh); | |
12231 | goto repeat; | |
12232 | } | |
12233 | } | |
12234 | - if (raid_conf) { | |
12235 | - PRINTK(("%d stripes handled, nr_handle %d\n", handled, atomic_read(&raid_conf->nr_handle))); | |
12236 | + if (conf) { | |
12237 | + PRINTK(("%d stripes handled, nr_handle %d\n", handled, md_atomic_read(&conf->nr_handle))); | |
12238 | save_flags(flags); | |
12239 | cli(); | |
12240 | - if (!atomic_read(&raid_conf->nr_handle)) | |
12241 | - clear_bit(THREAD_WAKEUP, &raid_conf->thread->flags); | |
12242 | + if (!md_atomic_read(&conf->nr_handle)) | |
12243 | + clear_bit(THREAD_WAKEUP, &conf->thread->flags); | |
12244 | + restore_flags(flags); | |
12245 | } | |
12246 | PRINTK(("--- raid5d inactive\n")); | |
12247 | } | |
12248 | ||
12249 | -#if SUPPORT_RECONSTRUCTION | |
12250 | /* | |
12251 | * Private kernel thread for parity reconstruction after an unclean | |
12252 | * shutdown. Reconstruction on spare drives in case of a failed drive | |
12253 | @@ -1309,44 +1391,64 @@ | |
12254 | */ | |
12255 | static void raid5syncd (void *data) | |
12256 | { | |
12257 | - struct raid5_data *raid_conf = data; | |
12258 | - struct md_dev *mddev = raid_conf->mddev; | |
12259 | + raid5_conf_t *conf = data; | |
12260 | + mddev_t *mddev = conf->mddev; | |
12261 | ||
12262 | - if (!raid_conf->resync_parity) | |
12263 | + if (!conf->resync_parity) | |
12264 | + return; | |
12265 | + if (conf->resync_parity == 2) | |
12266 | + return; | |
12267 | + down(&mddev->recovery_sem); | |
12268 | + if (md_do_sync(mddev,NULL)) { | |
12269 | + up(&mddev->recovery_sem); | |
12270 | + printk("raid5: resync aborted!\n"); | |
12271 | return; | |
12272 | - md_do_sync(mddev); | |
12273 | - raid_conf->resync_parity = 0; | |
12274 | + } | |
12275 | + conf->resync_parity = 0; | |
12276 | + up(&mddev->recovery_sem); | |
12277 | + printk("raid5: resync finished.\n"); | |
12278 | } | |
12279 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
12280 | ||
12281 | -static int __check_consistency (struct md_dev *mddev, int row) | |
12282 | +static int __check_consistency (mddev_t *mddev, int row) | |
12283 | { | |
12284 | - struct raid5_data *raid_conf = mddev->private; | |
12285 | + raid5_conf_t *conf = mddev->private; | |
12286 | kdev_t dev; | |
12287 | struct buffer_head *bh[MD_SB_DISKS], tmp; | |
12288 | - int i, rc = 0, nr = 0; | |
12289 | + int i, rc = 0, nr = 0, count; | |
12290 | + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; | |
12291 | ||
12292 | - if (raid_conf->working_disks != raid_conf->raid_disks) | |
12293 | + if (conf->working_disks != conf->raid_disks) | |
12294 | return 0; | |
12295 | tmp.b_size = 4096; | |
12296 | if ((tmp.b_data = (char *) get_free_page(GFP_KERNEL)) == NULL) | |
12297 | return 0; | |
12298 | + md_clear_page((unsigned long)tmp.b_data); | |
12299 | memset(bh, 0, MD_SB_DISKS * sizeof(struct buffer_head *)); | |
12300 | - for (i = 0; i < raid_conf->raid_disks; i++) { | |
12301 | - dev = raid_conf->disks[i].dev; | |
12302 | + for (i = 0; i < conf->raid_disks; i++) { | |
12303 | + dev = conf->disks[i].dev; | |
12304 | set_blocksize(dev, 4096); | |
12305 | if ((bh[i] = bread(dev, row / 4, 4096)) == NULL) | |
12306 | break; | |
12307 | nr++; | |
12308 | } | |
12309 | - if (nr == raid_conf->raid_disks) { | |
12310 | - for (i = 1; i < nr; i++) | |
12311 | - xor_block(&tmp, bh[i]); | |
12312 | + if (nr == conf->raid_disks) { | |
12313 | + bh_ptr[0] = &tmp; | |
12314 | + count = 1; | |
12315 | + for (i = 1; i < nr; i++) { | |
12316 | + bh_ptr[count++] = bh[i]; | |
12317 | + if (count == MAX_XOR_BLOCKS) { | |
12318 | + xor_block(count, &bh_ptr[0]); | |
12319 | + count = 1; | |
12320 | + } | |
12321 | + } | |
12322 | + if (count != 1) { | |
12323 | + xor_block(count, &bh_ptr[0]); | |
12324 | + } | |
12325 | if (memcmp(tmp.b_data, bh[0]->b_data, 4096)) | |
12326 | rc = 1; | |
12327 | } | |
12328 | - for (i = 0; i < raid_conf->raid_disks; i++) { | |
12329 | - dev = raid_conf->disks[i].dev; | |
12330 | + for (i = 0; i < conf->raid_disks; i++) { | |
12331 | + dev = conf->disks[i].dev; | |
12332 | if (bh[i]) { | |
12333 | bforget(bh[i]); | |
12334 | bh[i] = NULL; | |
12335 | @@ -1358,285 +1460,607 @@ | |
12336 | return rc; | |
12337 | } | |
12338 | ||
12339 | -static int check_consistency (struct md_dev *mddev) | |
12340 | +static int check_consistency (mddev_t *mddev) | |
12341 | { | |
12342 | - int size = mddev->sb->size; | |
12343 | - int row; | |
12344 | + if (__check_consistency(mddev, 0)) | |
12345 | +/* | |
12346 | + * We are not checking this currently, as it's legitimate to have | |
12347 | + * an inconsistent array, at creation time. | |
12348 | + */ | |
12349 | + return 0; | |
12350 | ||
12351 | - for (row = 0; row < size; row += size / 8) | |
12352 | - if (__check_consistency(mddev, row)) | |
12353 | - return 1; | |
12354 | return 0; | |
12355 | } | |
12356 | ||
12357 | -static int raid5_run (int minor, struct md_dev *mddev) | |
12358 | +static int raid5_run (mddev_t *mddev) | |
12359 | { | |
12360 | - struct raid5_data *raid_conf; | |
12361 | + raid5_conf_t *conf; | |
12362 | int i, j, raid_disk, memory; | |
12363 | - md_superblock_t *sb = mddev->sb; | |
12364 | - md_descriptor_t *descriptor; | |
12365 | - struct real_dev *realdev; | |
12366 | + mdp_super_t *sb = mddev->sb; | |
12367 | + mdp_disk_t *desc; | |
12368 | + mdk_rdev_t *rdev; | |
12369 | + struct disk_info *disk; | |
12370 | + struct md_list_head *tmp; | |
12371 | + int start_recovery = 0; | |
12372 | ||
12373 | MOD_INC_USE_COUNT; | |
12374 | ||
12375 | if (sb->level != 5 && sb->level != 4) { | |
12376 | - printk("raid5: %s: raid level not set to 4/5 (%d)\n", kdevname(MKDEV(MD_MAJOR, minor)), sb->level); | |
12377 | + printk("raid5: md%d: raid level not set to 4/5 (%d)\n", mdidx(mddev), sb->level); | |
12378 | MOD_DEC_USE_COUNT; | |
12379 | return -EIO; | |
12380 | } | |
12381 | ||
12382 | - mddev->private = kmalloc (sizeof (struct raid5_data), GFP_KERNEL); | |
12383 | - if ((raid_conf = mddev->private) == NULL) | |
12384 | + mddev->private = kmalloc (sizeof (raid5_conf_t), GFP_KERNEL); | |
12385 | + if ((conf = mddev->private) == NULL) | |
12386 | goto abort; | |
12387 | - memset (raid_conf, 0, sizeof (*raid_conf)); | |
12388 | - raid_conf->mddev = mddev; | |
12389 | + memset (conf, 0, sizeof (*conf)); | |
12390 | + conf->mddev = mddev; | |
12391 | ||
12392 | - if ((raid_conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) | |
12393 | + if ((conf->stripe_hashtbl = (struct stripe_head **) md__get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) | |
12394 | goto abort; | |
12395 | - memset(raid_conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); | |
12396 | + memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); | |
12397 | ||
12398 | - init_waitqueue(&raid_conf->wait_for_stripe); | |
12399 | - PRINTK(("raid5_run(%d) called.\n", minor)); | |
12400 | - | |
12401 | - for (i = 0; i < mddev->nb_dev; i++) { | |
12402 | - realdev = &mddev->devices[i]; | |
12403 | - if (!realdev->sb) { | |
12404 | - printk(KERN_ERR "raid5: disabled device %s (couldn't access raid superblock)\n", kdevname(realdev->dev)); | |
12405 | - continue; | |
12406 | - } | |
12407 | + init_waitqueue(&conf->wait_for_stripe); | |
12408 | + PRINTK(("raid5_run(md%d) called.\n", mdidx(mddev))); | |
12409 | ||
12410 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
12411 | /* | |
12412 | * This is important -- we are using the descriptor on | |
12413 | * the disk only to get a pointer to the descriptor on | |
12414 | * the main superblock, which might be more recent. | |
12415 | */ | |
12416 | - descriptor = &sb->disks[realdev->sb->descriptor.number]; | |
12417 | - if (descriptor->state & (1 << MD_FAULTY_DEVICE)) { | |
12418 | - printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", kdevname(realdev->dev)); | |
12419 | + desc = sb->disks + rdev->desc_nr; | |
12420 | + raid_disk = desc->raid_disk; | |
12421 | + disk = conf->disks + raid_disk; | |
12422 | + | |
12423 | + if (disk_faulty(desc)) { | |
12424 | + printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", partition_name(rdev->dev)); | |
12425 | + if (!rdev->faulty) { | |
12426 | + MD_BUG(); | |
12427 | + goto abort; | |
12428 | + } | |
12429 | + disk->number = desc->number; | |
12430 | + disk->raid_disk = raid_disk; | |
12431 | + disk->dev = rdev->dev; | |
12432 | + | |
12433 | + disk->operational = 0; | |
12434 | + disk->write_only = 0; | |
12435 | + disk->spare = 0; | |
12436 | + disk->used_slot = 1; | |
12437 | continue; | |
12438 | } | |
12439 | - if (descriptor->state & (1 << MD_ACTIVE_DEVICE)) { | |
12440 | - if (!(descriptor->state & (1 << MD_SYNC_DEVICE))) { | |
12441 | - printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", kdevname(realdev->dev)); | |
12442 | - continue; | |
12443 | + if (disk_active(desc)) { | |
12444 | + if (!disk_sync(desc)) { | |
12445 | + printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", partition_name(rdev->dev)); | |
12446 | + MD_BUG(); | |
12447 | + goto abort; | |
12448 | } | |
12449 | - raid_disk = descriptor->raid_disk; | |
12450 | - if (descriptor->number > sb->nr_disks || raid_disk > sb->raid_disks) { | |
12451 | - printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", kdevname(realdev->dev)); | |
12452 | + if (raid_disk > sb->raid_disks) { | |
12453 | + printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", partition_name(rdev->dev)); | |
12454 | continue; | |
12455 | } | |
12456 | - if (raid_conf->disks[raid_disk].operational) { | |
12457 | - printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", kdevname(realdev->dev), raid_disk); | |
12458 | + if (disk->operational) { | |
12459 | + printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", partition_name(rdev->dev), raid_disk); | |
12460 | continue; | |
12461 | } | |
12462 | - printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", kdevname(realdev->dev), raid_disk); | |
12463 | + printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", partition_name(rdev->dev), raid_disk); | |
12464 | ||
12465 | - raid_conf->disks[raid_disk].number = descriptor->number; | |
12466 | - raid_conf->disks[raid_disk].raid_disk = raid_disk; | |
12467 | - raid_conf->disks[raid_disk].dev = mddev->devices[i].dev; | |
12468 | - raid_conf->disks[raid_disk].operational = 1; | |
12469 | + disk->number = desc->number; | |
12470 | + disk->raid_disk = raid_disk; | |
12471 | + disk->dev = rdev->dev; | |
12472 | + disk->operational = 1; | |
12473 | + disk->used_slot = 1; | |
12474 | ||
12475 | - raid_conf->working_disks++; | |
12476 | + conf->working_disks++; | |
12477 | } else { | |
12478 | /* | |
12479 | * Must be a spare disk .. | |
12480 | */ | |
12481 | - printk(KERN_INFO "raid5: spare disk %s\n", kdevname(realdev->dev)); | |
12482 | - raid_disk = descriptor->raid_disk; | |
12483 | - raid_conf->disks[raid_disk].number = descriptor->number; | |
12484 | - raid_conf->disks[raid_disk].raid_disk = raid_disk; | |
12485 | - raid_conf->disks[raid_disk].dev = mddev->devices [i].dev; | |
12486 | - | |
12487 | - raid_conf->disks[raid_disk].operational = 0; | |
12488 | - raid_conf->disks[raid_disk].write_only = 0; | |
12489 | - raid_conf->disks[raid_disk].spare = 1; | |
12490 | - } | |
12491 | - } | |
12492 | - raid_conf->raid_disks = sb->raid_disks; | |
12493 | - raid_conf->failed_disks = raid_conf->raid_disks - raid_conf->working_disks; | |
12494 | - raid_conf->mddev = mddev; | |
12495 | - raid_conf->chunk_size = sb->chunk_size; | |
12496 | - raid_conf->level = sb->level; | |
12497 | - raid_conf->algorithm = sb->parity_algorithm; | |
12498 | - raid_conf->max_nr_stripes = NR_STRIPES; | |
12499 | + printk(KERN_INFO "raid5: spare disk %s\n", partition_name(rdev->dev)); | |
12500 | + disk->number = desc->number; | |
12501 | + disk->raid_disk = raid_disk; | |
12502 | + disk->dev = rdev->dev; | |
12503 | ||
12504 | - if (raid_conf->working_disks != sb->raid_disks && sb->state != (1 << MD_SB_CLEAN)) { | |
12505 | - printk(KERN_ALERT "raid5: raid set %s not clean and not all disks are operational -- run ckraid\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
12506 | - goto abort; | |
12507 | + disk->operational = 0; | |
12508 | + disk->write_only = 0; | |
12509 | + disk->spare = 1; | |
12510 | + disk->used_slot = 1; | |
12511 | + } | |
12512 | } | |
12513 | - if (!raid_conf->chunk_size || raid_conf->chunk_size % 4) { | |
12514 | - printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", raid_conf->chunk_size, kdevname(MKDEV(MD_MAJOR, minor))); | |
12515 | + | |
12516 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
12517 | + desc = sb->disks + i; | |
12518 | + raid_disk = desc->raid_disk; | |
12519 | + disk = conf->disks + raid_disk; | |
12520 | + | |
12521 | + if (disk_faulty(desc) && (raid_disk < sb->raid_disks) && | |
12522 | + !conf->disks[raid_disk].used_slot) { | |
12523 | + | |
12524 | + disk->number = desc->number; | |
12525 | + disk->raid_disk = raid_disk; | |
12526 | + disk->dev = MKDEV(0,0); | |
12527 | + | |
12528 | + disk->operational = 0; | |
12529 | + disk->write_only = 0; | |
12530 | + disk->spare = 0; | |
12531 | + disk->used_slot = 1; | |
12532 | + } | |
12533 | + } | |
12534 | + | |
12535 | + conf->raid_disks = sb->raid_disks; | |
12536 | + /* | |
12537 | + * 0 for a fully functional array, 1 for a degraded array. | |
12538 | + */ | |
12539 | + conf->failed_disks = conf->raid_disks - conf->working_disks; | |
12540 | + conf->mddev = mddev; | |
12541 | + conf->chunk_size = sb->chunk_size; | |
12542 | + conf->level = sb->level; | |
12543 | + conf->algorithm = sb->layout; | |
12544 | + conf->max_nr_stripes = NR_STRIPES; | |
12545 | + | |
12546 | +#if 0 | |
12547 | + for (i = 0; i < conf->raid_disks; i++) { | |
12548 | + if (!conf->disks[i].used_slot) { | |
12549 | + MD_BUG(); | |
12550 | + goto abort; | |
12551 | + } | |
12552 | + } | |
12553 | +#endif | |
12554 | + if (!conf->chunk_size || conf->chunk_size % 4) { | |
12555 | + printk(KERN_ERR "raid5: invalid chunk size %d for md%d\n", conf->chunk_size, mdidx(mddev)); | |
12556 | goto abort; | |
12557 | } | |
12558 | - if (raid_conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { | |
12559 | - printk(KERN_ERR "raid5: unsupported parity algorithm %d for %s\n", raid_conf->algorithm, kdevname(MKDEV(MD_MAJOR, minor))); | |
12560 | + if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) { | |
12561 | + printk(KERN_ERR "raid5: unsupported parity algorithm %d for md%d\n", conf->algorithm, mdidx(mddev)); | |
12562 | goto abort; | |
12563 | } | |
12564 | - if (raid_conf->failed_disks > 1) { | |
12565 | - printk(KERN_ERR "raid5: not enough operational devices for %s (%d/%d failed)\n", kdevname(MKDEV(MD_MAJOR, minor)), raid_conf->failed_disks, raid_conf->raid_disks); | |
12566 | + if (conf->failed_disks > 1) { | |
12567 | + printk(KERN_ERR "raid5: not enough operational devices for md%d (%d/%d failed)\n", mdidx(mddev), conf->failed_disks, conf->raid_disks); | |
12568 | goto abort; | |
12569 | } | |
12570 | ||
12571 | - if ((sb->state & (1 << MD_SB_CLEAN)) && check_consistency(mddev)) { | |
12572 | - printk(KERN_ERR "raid5: detected raid-5 xor inconsistenty -- run ckraid\n"); | |
12573 | - sb->state |= 1 << MD_SB_ERRORS; | |
12574 | - goto abort; | |
12575 | + if (conf->working_disks != sb->raid_disks) { | |
12576 | + printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); | |
12577 | + start_recovery = 1; | |
12578 | } | |
12579 | ||
12580 | - if ((raid_conf->thread = md_register_thread(raid5d, raid_conf)) == NULL) { | |
12581 | - printk(KERN_ERR "raid5: couldn't allocate thread for %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
12582 | - goto abort; | |
12583 | + if (!start_recovery && (sb->state & (1 << MD_SB_CLEAN)) && | |
12584 | + check_consistency(mddev)) { | |
12585 | + printk(KERN_ERR "raid5: detected raid-5 superblock xor inconsistency -- running resync\n"); | |
12586 | + sb->state &= ~(1 << MD_SB_CLEAN); | |
12587 | } | |
12588 | ||
12589 | -#if SUPPORT_RECONSTRUCTION | |
12590 | - if ((raid_conf->resync_thread = md_register_thread(raid5syncd, raid_conf)) == NULL) { | |
12591 | - printk(KERN_ERR "raid5: couldn't allocate thread for %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
12592 | - goto abort; | |
12593 | + { | |
12594 | + const char * name = "raid5d"; | |
12595 | + | |
12596 | + conf->thread = md_register_thread(raid5d, conf, name); | |
12597 | + if (!conf->thread) { | |
12598 | + printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev)); | |
12599 | + goto abort; | |
12600 | + } | |
12601 | } | |
12602 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
12603 | ||
12604 | - memory = raid_conf->max_nr_stripes * (sizeof(struct stripe_head) + | |
12605 | - raid_conf->raid_disks * (sizeof(struct buffer_head) + | |
12606 | + memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | |
12607 | + conf->raid_disks * (sizeof(struct buffer_head) + | |
12608 | 2 * (sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; | |
12609 | - if (grow_stripes(raid_conf, raid_conf->max_nr_stripes, GFP_KERNEL)) { | |
12610 | + if (grow_stripes(conf, conf->max_nr_stripes, GFP_KERNEL)) { | |
12611 | printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); | |
12612 | - shrink_stripes(raid_conf, raid_conf->max_nr_stripes); | |
12613 | + shrink_stripes(conf, conf->max_nr_stripes); | |
12614 | goto abort; | |
12615 | } else | |
12616 | - printk(KERN_INFO "raid5: allocated %dkB for %s\n", memory, kdevname(MKDEV(MD_MAJOR, minor))); | |
12617 | + printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev)); | |
12618 | ||
12619 | /* | |
12620 | * Regenerate the "device is in sync with the raid set" bit for | |
12621 | * each device. | |
12622 | */ | |
12623 | - for (i = 0; i < sb->nr_disks ; i++) { | |
12624 | - sb->disks[i].state &= ~(1 << MD_SYNC_DEVICE); | |
12625 | + for (i = 0; i < MD_SB_DISKS ; i++) { | |
12626 | + mark_disk_nonsync(sb->disks + i); | |
12627 | for (j = 0; j < sb->raid_disks; j++) { | |
12628 | - if (!raid_conf->disks[j].operational) | |
12629 | + if (!conf->disks[j].operational) | |
12630 | continue; | |
12631 | - if (sb->disks[i].number == raid_conf->disks[j].number) | |
12632 | - sb->disks[i].state |= 1 << MD_SYNC_DEVICE; | |
12633 | + if (sb->disks[i].number == conf->disks[j].number) | |
12634 | + mark_disk_sync(sb->disks + i); | |
12635 | } | |
12636 | } | |
12637 | - sb->active_disks = raid_conf->working_disks; | |
12638 | + sb->active_disks = conf->working_disks; | |
12639 | ||
12640 | if (sb->active_disks == sb->raid_disks) | |
12641 | - printk("raid5: raid level %d set %s active with %d out of %d devices, algorithm %d\n", raid_conf->level, kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks, raid_conf->algorithm); | |
12642 | + printk("raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); | |
12643 | else | |
12644 | - printk(KERN_ALERT "raid5: raid level %d set %s active with %d out of %d devices, algorithm %d\n", raid_conf->level, kdevname(MKDEV(MD_MAJOR, minor)), sb->active_disks, sb->raid_disks, raid_conf->algorithm); | |
12645 | + printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); | |
12646 | + | |
12647 | + if (!start_recovery && ((sb->state & (1 << MD_SB_CLEAN))==0)) { | |
12648 | + const char * name = "raid5syncd"; | |
12649 | + | |
12650 | + conf->resync_thread = md_register_thread(raid5syncd, conf,name); | |
12651 | + if (!conf->resync_thread) { | |
12652 | + printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev)); | |
12653 | + goto abort; | |
12654 | + } | |
12655 | ||
12656 | - if ((sb->state & (1 << MD_SB_CLEAN)) == 0) { | |
12657 | - printk("raid5: raid set %s not clean; re-constructing parity\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
12658 | - raid_conf->resync_parity = 1; | |
12659 | -#if SUPPORT_RECONSTRUCTION | |
12660 | - md_wakeup_thread(raid_conf->resync_thread); | |
12661 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
12662 | + printk("raid5: raid set md%d not clean; reconstructing parity\n", mdidx(mddev)); | |
12663 | + conf->resync_parity = 1; | |
12664 | + md_wakeup_thread(conf->resync_thread); | |
12665 | } | |
12666 | ||
12667 | + print_raid5_conf(conf); | |
12668 | + if (start_recovery) | |
12669 | + md_recover_arrays(); | |
12670 | + print_raid5_conf(conf); | |
12671 | + | |
12672 | /* Ok, everything is just fine now */ | |
12673 | return (0); | |
12674 | abort: | |
12675 | - if (raid_conf) { | |
12676 | - if (raid_conf->stripe_hashtbl) | |
12677 | - free_pages((unsigned long) raid_conf->stripe_hashtbl, HASH_PAGES_ORDER); | |
12678 | - kfree(raid_conf); | |
12679 | + if (conf) { | |
12680 | + print_raid5_conf(conf); | |
12681 | + if (conf->stripe_hashtbl) | |
12682 | + free_pages((unsigned long) conf->stripe_hashtbl, | |
12683 | + HASH_PAGES_ORDER); | |
12684 | + kfree(conf); | |
12685 | } | |
12686 | mddev->private = NULL; | |
12687 | - printk(KERN_ALERT "raid5: failed to run raid set %s\n", kdevname(MKDEV(MD_MAJOR, minor))); | |
12688 | + printk(KERN_ALERT "raid5: failed to run raid set md%d\n", mdidx(mddev)); | |
12689 | MOD_DEC_USE_COUNT; | |
12690 | return -EIO; | |
12691 | } | |
12692 | ||
12693 | -static int raid5_stop (int minor, struct md_dev *mddev) | |
12694 | +static int raid5_stop_resync (mddev_t *mddev) | |
12695 | +{ | |
12696 | + raid5_conf_t *conf = mddev_to_conf(mddev); | |
12697 | + mdk_thread_t *thread = conf->resync_thread; | |
12698 | + | |
12699 | + if (thread) { | |
12700 | + if (conf->resync_parity) { | |
12701 | + conf->resync_parity = 2; | |
12702 | + md_interrupt_thread(thread); | |
12703 | + printk(KERN_INFO "raid5: parity resync was not fully finished, restarting next time.\n"); | |
12704 | + return 1; | |
12705 | + } | |
12706 | + return 0; | |
12707 | + } | |
12708 | + return 0; | |
12709 | +} | |
12710 | + | |
12711 | +static int raid5_restart_resync (mddev_t *mddev) | |
12712 | { | |
12713 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
12714 | + raid5_conf_t *conf = mddev_to_conf(mddev); | |
12715 | ||
12716 | - shrink_stripe_cache(raid_conf, raid_conf->max_nr_stripes); | |
12717 | - shrink_stripes(raid_conf, raid_conf->max_nr_stripes); | |
12718 | - md_unregister_thread(raid_conf->thread); | |
12719 | -#if SUPPORT_RECONSTRUCTION | |
12720 | - md_unregister_thread(raid_conf->resync_thread); | |
12721 | -#endif /* SUPPORT_RECONSTRUCTION */ | |
12722 | - free_pages((unsigned long) raid_conf->stripe_hashtbl, HASH_PAGES_ORDER); | |
12723 | - kfree(raid_conf); | |
12724 | + if (conf->resync_parity) { | |
12725 | + if (!conf->resync_thread) { | |
12726 | + MD_BUG(); | |
12727 | + return 0; | |
12728 | + } | |
12729 | + printk("raid5: waking up raid5resync.\n"); | |
12730 | + conf->resync_parity = 1; | |
12731 | + md_wakeup_thread(conf->resync_thread); | |
12732 | + return 1; | |
12733 | + } else | |
12734 | + printk("raid5: no restart-resync needed.\n"); | |
12735 | + return 0; | |
12736 | +} | |
12737 | + | |
12738 | + | |
12739 | +static int raid5_stop (mddev_t *mddev) | |
12740 | +{ | |
12741 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
12742 | + | |
12743 | + shrink_stripe_cache(conf, conf->max_nr_stripes); | |
12744 | + shrink_stripes(conf, conf->max_nr_stripes); | |
12745 | + md_unregister_thread(conf->thread); | |
12746 | + if (conf->resync_thread) | |
12747 | + md_unregister_thread(conf->resync_thread); | |
12748 | + free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); | |
12749 | + kfree(conf); | |
12750 | mddev->private = NULL; | |
12751 | MOD_DEC_USE_COUNT; | |
12752 | return 0; | |
12753 | } | |
12754 | ||
12755 | -static int raid5_status (char *page, int minor, struct md_dev *mddev) | |
12756 | +static int raid5_status (char *page, mddev_t *mddev) | |
12757 | { | |
12758 | - struct raid5_data *raid_conf = (struct raid5_data *) mddev->private; | |
12759 | - md_superblock_t *sb = mddev->sb; | |
12760 | + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | |
12761 | + mdp_super_t *sb = mddev->sb; | |
12762 | int sz = 0, i; | |
12763 | ||
12764 | - sz += sprintf (page+sz, " level %d, %dk chunk, algorithm %d", sb->level, sb->chunk_size >> 10, sb->parity_algorithm); | |
12765 | - sz += sprintf (page+sz, " [%d/%d] [", raid_conf->raid_disks, raid_conf->working_disks); | |
12766 | - for (i = 0; i < raid_conf->raid_disks; i++) | |
12767 | - sz += sprintf (page+sz, "%s", raid_conf->disks[i].operational ? "U" : "_"); | |
12768 | + sz += sprintf (page+sz, " level %d, %dk chunk, algorithm %d", sb->level, sb->chunk_size >> 10, sb->layout); | |
12769 | + sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks); | |
12770 | + for (i = 0; i < conf->raid_disks; i++) | |
12771 | + sz += sprintf (page+sz, "%s", conf->disks[i].operational ? "U" : "_"); | |
12772 | sz += sprintf (page+sz, "]"); | |
12773 | return sz; | |
12774 | } | |
12775 | ||
12776 | -static int raid5_mark_spare(struct md_dev *mddev, md_descriptor_t *spare, int state) | |
12777 | +static void print_raid5_conf (raid5_conf_t *conf) | |
12778 | +{ | |
12779 | + int i; | |
12780 | + struct disk_info *tmp; | |
12781 | + | |
12782 | + printk("RAID5 conf printout:\n"); | |
12783 | + if (!conf) { | |
12784 | + printk("(conf==NULL)\n"); | |
12785 | + return; | |
12786 | + } | |
12787 | + printk(" --- rd:%d wd:%d fd:%d\n", conf->raid_disks, | |
12788 | + conf->working_disks, conf->failed_disks); | |
12789 | + | |
12790 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
12791 | + tmp = conf->disks + i; | |
12792 | + printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", | |
12793 | + i, tmp->spare,tmp->operational, | |
12794 | + tmp->number,tmp->raid_disk,tmp->used_slot, | |
12795 | + partition_name(tmp->dev)); | |
12796 | + } | |
12797 | +} | |
12798 | + | |
12799 | +static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) | |
12800 | { | |
12801 | - int i = 0, failed_disk = -1; | |
12802 | - struct raid5_data *raid_conf = mddev->private; | |
12803 | - struct disk_info *disk = raid_conf->disks; | |
12804 | + int err = 0; | |
12805 | + int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; | |
12806 | + raid5_conf_t *conf = mddev->private; | |
12807 | + struct disk_info *tmp, *sdisk, *fdisk, *rdisk, *adisk; | |
12808 | unsigned long flags; | |
12809 | - md_superblock_t *sb = mddev->sb; | |
12810 | - md_descriptor_t *descriptor; | |
12811 | + mdp_super_t *sb = mddev->sb; | |
12812 | + mdp_disk_t *failed_desc, *spare_desc, *added_desc; | |
12813 | ||
12814 | - for (i = 0; i < MD_SB_DISKS; i++, disk++) { | |
12815 | - if (disk->spare && disk->number == spare->number) | |
12816 | - goto found; | |
12817 | - } | |
12818 | - return 1; | |
12819 | -found: | |
12820 | - for (i = 0, disk = raid_conf->disks; i < raid_conf->raid_disks; i++, disk++) | |
12821 | - if (!disk->operational) | |
12822 | - failed_disk = i; | |
12823 | - if (failed_disk == -1) | |
12824 | - return 1; | |
12825 | save_flags(flags); | |
12826 | cli(); | |
12827 | + | |
12828 | + print_raid5_conf(conf); | |
12829 | + /* | |
12830 | + * find the disk ... | |
12831 | + */ | |
12832 | switch (state) { | |
12833 | - case SPARE_WRITE: | |
12834 | - disk->operational = 1; | |
12835 | - disk->write_only = 1; | |
12836 | - raid_conf->spare = disk; | |
12837 | - break; | |
12838 | - case SPARE_INACTIVE: | |
12839 | - disk->operational = 0; | |
12840 | - disk->write_only = 0; | |
12841 | - raid_conf->spare = NULL; | |
12842 | - break; | |
12843 | - case SPARE_ACTIVE: | |
12844 | - disk->spare = 0; | |
12845 | - disk->write_only = 0; | |
12846 | ||
12847 | - descriptor = &sb->disks[raid_conf->disks[failed_disk].number]; | |
12848 | - i = spare->raid_disk; | |
12849 | - disk->raid_disk = spare->raid_disk = descriptor->raid_disk; | |
12850 | - if (disk->raid_disk != failed_disk) | |
12851 | - printk("raid5: disk->raid_disk != failed_disk"); | |
12852 | - descriptor->raid_disk = i; | |
12853 | - | |
12854 | - raid_conf->spare = NULL; | |
12855 | - raid_conf->working_disks++; | |
12856 | - raid_conf->failed_disks--; | |
12857 | - raid_conf->disks[failed_disk] = *disk; | |
12858 | - break; | |
12859 | - default: | |
12860 | - printk("raid5_mark_spare: bug: state == %d\n", state); | |
12861 | - restore_flags(flags); | |
12862 | - return 1; | |
12863 | + case DISKOP_SPARE_ACTIVE: | |
12864 | + | |
12865 | + /* | |
12866 | + * Find the failed disk within the RAID5 configuration ... | |
12867 | + * (this can only be in the first conf->raid_disks part) | |
12868 | + */ | |
12869 | + for (i = 0; i < conf->raid_disks; i++) { | |
12870 | + tmp = conf->disks + i; | |
12871 | + if ((!tmp->operational && !tmp->spare) || | |
12872 | + !tmp->used_slot) { | |
12873 | + failed_disk = i; | |
12874 | + break; | |
12875 | + } | |
12876 | + } | |
12877 | + /* | |
12878 | + * When we activate a spare disk we _must_ have a disk in | |
12879 | + * the lower (active) part of the array to replace. | |
12880 | + */ | |
12881 | + if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) { | |
12882 | + MD_BUG(); | |
12883 | + err = 1; | |
12884 | + goto abort; | |
12885 | + } | |
12886 | + /* fall through */ | |
12887 | + | |
12888 | + case DISKOP_SPARE_WRITE: | |
12889 | + case DISKOP_SPARE_INACTIVE: | |
12890 | + | |
12891 | + /* | |
12892 | + * Find the spare disk ... (can only be in the 'high' | |
12893 | + * area of the array) | |
12894 | + */ | |
12895 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
12896 | + tmp = conf->disks + i; | |
12897 | + if (tmp->spare && tmp->number == (*d)->number) { | |
12898 | + spare_disk = i; | |
12899 | + break; | |
12900 | + } | |
12901 | + } | |
12902 | + if (spare_disk == -1) { | |
12903 | + MD_BUG(); | |
12904 | + err = 1; | |
12905 | + goto abort; | |
12906 | + } | |
12907 | + break; | |
12908 | + | |
12909 | + case DISKOP_HOT_REMOVE_DISK: | |
12910 | + | |
12911 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
12912 | + tmp = conf->disks + i; | |
12913 | + if (tmp->used_slot && (tmp->number == (*d)->number)) { | |
12914 | + if (tmp->operational) { | |
12915 | + err = -EBUSY; | |
12916 | + goto abort; | |
12917 | + } | |
12918 | + removed_disk = i; | |
12919 | + break; | |
12920 | + } | |
12921 | + } | |
12922 | + if (removed_disk == -1) { | |
12923 | + MD_BUG(); | |
12924 | + err = 1; | |
12925 | + goto abort; | |
12926 | + } | |
12927 | + break; | |
12928 | + | |
12929 | + case DISKOP_HOT_ADD_DISK: | |
12930 | + | |
12931 | + for (i = conf->raid_disks; i < MD_SB_DISKS; i++) { | |
12932 | + tmp = conf->disks + i; | |
12933 | + if (!tmp->used_slot) { | |
12934 | + added_disk = i; | |
12935 | + break; | |
12936 | + } | |
12937 | + } | |
12938 | + if (added_disk == -1) { | |
12939 | + MD_BUG(); | |
12940 | + err = 1; | |
12941 | + goto abort; | |
12942 | + } | |
12943 | + break; | |
12944 | + } | |
12945 | + | |
12946 | + switch (state) { | |
12947 | + /* | |
12948 | + * Switch the spare disk to write-only mode: | |
12949 | + */ | |
12950 | + case DISKOP_SPARE_WRITE: | |
12951 | + if (conf->spare) { | |
12952 | + MD_BUG(); | |
12953 | + err = 1; | |
12954 | + goto abort; | |
12955 | + } | |
12956 | + sdisk = conf->disks + spare_disk; | |
12957 | + sdisk->operational = 1; | |
12958 | + sdisk->write_only = 1; | |
12959 | + conf->spare = sdisk; | |
12960 | + break; | |
12961 | + /* | |
12962 | + * Deactivate a spare disk: | |
12963 | + */ | |
12964 | + case DISKOP_SPARE_INACTIVE: | |
12965 | + sdisk = conf->disks + spare_disk; | |
12966 | + sdisk->operational = 0; | |
12967 | + sdisk->write_only = 0; | |
12968 | + /* | |
12969 | + * Was the spare being resynced? | |
12970 | + */ | |
12971 | + if (conf->spare == sdisk) | |
12972 | + conf->spare = NULL; | |
12973 | + break; | |
12974 | + /* | |
12975 | + * Activate (mark read-write) the (now sync) spare disk, | |
12976 | + * which means we switch it's 'raid position' (->raid_disk) | |
12977 | + * with the failed disk. (only the first 'conf->raid_disks' | |
12978 | + * slots are used for 'real' disks and we must preserve this | |
12979 | + * property) | |
12980 | + */ | |
12981 | + case DISKOP_SPARE_ACTIVE: | |
12982 | + if (!conf->spare) { | |
12983 | + MD_BUG(); | |
12984 | + err = 1; | |
12985 | + goto abort; | |
12986 | + } | |
12987 | + sdisk = conf->disks + spare_disk; | |
12988 | + fdisk = conf->disks + failed_disk; | |
12989 | + | |
12990 | + spare_desc = &sb->disks[sdisk->number]; | |
12991 | + failed_desc = &sb->disks[fdisk->number]; | |
12992 | + | |
12993 | + if (spare_desc != *d) { | |
12994 | + MD_BUG(); | |
12995 | + err = 1; | |
12996 | + goto abort; | |
12997 | + } | |
12998 | + | |
12999 | + if (spare_desc->raid_disk != sdisk->raid_disk) { | |
13000 | + MD_BUG(); | |
13001 | + err = 1; | |
13002 | + goto abort; | |
13003 | + } | |
13004 | + | |
13005 | + if (sdisk->raid_disk != spare_disk) { | |
13006 | + MD_BUG(); | |
13007 | + err = 1; | |
13008 | + goto abort; | |
13009 | + } | |
13010 | + | |
13011 | + if (failed_desc->raid_disk != fdisk->raid_disk) { | |
13012 | + MD_BUG(); | |
13013 | + err = 1; | |
13014 | + goto abort; | |
13015 | + } | |
13016 | + | |
13017 | + if (fdisk->raid_disk != failed_disk) { | |
13018 | + MD_BUG(); | |
13019 | + err = 1; | |
13020 | + goto abort; | |
13021 | + } | |
13022 | + | |
13023 | + /* | |
13024 | + * do the switch finally | |
13025 | + */ | |
13026 | + xchg_values(*spare_desc, *failed_desc); | |
13027 | + xchg_values(*fdisk, *sdisk); | |
13028 | + | |
13029 | + /* | |
13030 | + * (careful, 'failed' and 'spare' are switched from now on) | |
13031 | + * | |
13032 | + * we want to preserve linear numbering and we want to | |
13033 | + * give the proper raid_disk number to the now activated | |
13034 | + * disk. (this means we switch back these values) | |
13035 | + */ | |
13036 | + | |
13037 | + xchg_values(spare_desc->raid_disk, failed_desc->raid_disk); | |
13038 | + xchg_values(sdisk->raid_disk, fdisk->raid_disk); | |
13039 | + xchg_values(spare_desc->number, failed_desc->number); | |
13040 | + xchg_values(sdisk->number, fdisk->number); | |
13041 | + | |
13042 | + *d = failed_desc; | |
13043 | + | |
13044 | + if (sdisk->dev == MKDEV(0,0)) | |
13045 | + sdisk->used_slot = 0; | |
13046 | + | |
13047 | + /* | |
13048 | + * this really activates the spare. | |
13049 | + */ | |
13050 | + fdisk->spare = 0; | |
13051 | + fdisk->write_only = 0; | |
13052 | + | |
13053 | + /* | |
13054 | + * if we activate a spare, we definitely replace a | |
13055 | + * non-operational disk slot in the 'low' area of | |
13056 | + * the disk array. | |
13057 | + */ | |
13058 | + conf->failed_disks--; | |
13059 | + conf->working_disks++; | |
13060 | + conf->spare = NULL; | |
13061 | + | |
13062 | + break; | |
13063 | + | |
13064 | + case DISKOP_HOT_REMOVE_DISK: | |
13065 | + rdisk = conf->disks + removed_disk; | |
13066 | + | |
13067 | + if (rdisk->spare && (removed_disk < conf->raid_disks)) { | |
13068 | + MD_BUG(); | |
13069 | + err = 1; | |
13070 | + goto abort; | |
13071 | + } | |
13072 | + rdisk->dev = MKDEV(0,0); | |
13073 | + rdisk->used_slot = 0; | |
13074 | + | |
13075 | + break; | |
13076 | + | |
13077 | + case DISKOP_HOT_ADD_DISK: | |
13078 | + adisk = conf->disks + added_disk; | |
13079 | + added_desc = *d; | |
13080 | + | |
13081 | + if (added_disk != added_desc->number) { | |
13082 | + MD_BUG(); | |
13083 | + err = 1; | |
13084 | + goto abort; | |
13085 | + } | |
13086 | + | |
13087 | + adisk->number = added_desc->number; | |
13088 | + adisk->raid_disk = added_desc->raid_disk; | |
13089 | + adisk->dev = MKDEV(added_desc->major,added_desc->minor); | |
13090 | + | |
13091 | + adisk->operational = 0; | |
13092 | + adisk->write_only = 0; | |
13093 | + adisk->spare = 1; | |
13094 | + adisk->used_slot = 1; | |
13095 | + | |
13096 | + | |
13097 | + break; | |
13098 | + | |
13099 | + default: | |
13100 | + MD_BUG(); | |
13101 | + err = 1; | |
13102 | + goto abort; | |
13103 | } | |
13104 | +abort: | |
13105 | restore_flags(flags); | |
13106 | - return 0; | |
13107 | + print_raid5_conf(conf); | |
13108 | + return err; | |
13109 | } | |
13110 | ||
13111 | -static struct md_personality raid5_personality= | |
13112 | +static mdk_personality_t raid5_personality= | |
13113 | { | |
13114 | "raid5", | |
13115 | raid5_map, | |
13116 | @@ -1648,14 +2072,19 @@ | |
13117 | NULL, /* no ioctls */ | |
13118 | 0, | |
13119 | raid5_error, | |
13120 | - /* raid5_hot_add_disk, */ NULL, | |
13121 | - /* raid1_hot_remove_drive */ NULL, | |
13122 | - raid5_mark_spare | |
13123 | + raid5_diskop, | |
13124 | + raid5_stop_resync, | |
13125 | + raid5_restart_resync | |
13126 | }; | |
13127 | ||
13128 | int raid5_init (void) | |
13129 | { | |
13130 | - return register_md_personality (RAID5, &raid5_personality); | |
13131 | + int err; | |
13132 | + | |
13133 | + err = register_md_personality (RAID5, &raid5_personality); | |
13134 | + if (err) | |
13135 | + return err; | |
13136 | + return 0; | |
13137 | } | |
13138 | ||
13139 | #ifdef MODULE | |
13140 | --- linux/drivers/block/translucent.c.orig Fri Nov 23 11:18:20 2001 | |
13141 | +++ linux/drivers/block/translucent.c Fri Nov 23 11:18:20 2001 | |
13142 | @@ -0,0 +1,136 @@ | |
13143 | +/* | |
13144 | + translucent.c : Translucent RAID driver for Linux | |
13145 | + Copyright (C) 1998 Ingo Molnar | |
13146 | + | |
13147 | + Translucent mode management functions. | |
13148 | + | |
13149 | + This program is free software; you can redistribute it and/or modify | |
13150 | + it under the terms of the GNU General Public License as published by | |
13151 | + the Free Software Foundation; either version 2, or (at your option) | |
13152 | + any later version. | |
13153 | + | |
13154 | + You should have received a copy of the GNU General Public License | |
13155 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
13156 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
13157 | +*/ | |
13158 | + | |
13159 | +#include <linux/module.h> | |
13160 | + | |
13161 | +#include <linux/raid/md.h> | |
13162 | +#include <linux/malloc.h> | |
13163 | + | |
13164 | +#include <linux/raid/translucent.h> | |
13165 | + | |
13166 | +#define MAJOR_NR MD_MAJOR | |
13167 | +#define MD_DRIVER | |
13168 | +#define MD_PERSONALITY | |
13169 | + | |
13170 | +static int translucent_run (mddev_t *mddev) | |
13171 | +{ | |
13172 | + translucent_conf_t *conf; | |
13173 | + mdk_rdev_t *rdev; | |
13174 | + int i; | |
13175 | + | |
13176 | + MOD_INC_USE_COUNT; | |
13177 | + | |
13178 | + conf = kmalloc (sizeof (*conf), GFP_KERNEL); | |
13179 | + if (!conf) | |
13180 | + goto out; | |
13181 | + mddev->private = conf; | |
13182 | + | |
13183 | + if (mddev->nb_dev != 2) { | |
13184 | + printk("translucent: this mode needs 2 disks, aborting!\n"); | |
13185 | + goto out; | |
13186 | + } | |
13187 | + | |
13188 | + if (md_check_ordering(mddev)) { | |
13189 | + printk("translucent: disks are not ordered, aborting!\n"); | |
13190 | + goto out; | |
13191 | + } | |
13192 | + | |
13193 | + ITERATE_RDEV_ORDERED(mddev,rdev,i) { | |
13194 | + dev_info_t *disk = conf->disks + i; | |
13195 | + | |
13196 | + disk->dev = rdev->dev; | |
13197 | + disk->size = rdev->size; | |
13198 | + } | |
13199 | + | |
13200 | + return 0; | |
13201 | + | |
13202 | +out: | |
13203 | + if (conf) | |
13204 | + kfree(conf); | |
13205 | + | |
13206 | + MOD_DEC_USE_COUNT; | |
13207 | + return 1; | |
13208 | +} | |
13209 | + | |
13210 | +static int translucent_stop (mddev_t *mddev) | |
13211 | +{ | |
13212 | + translucent_conf_t *conf = mddev_to_conf(mddev); | |
13213 | + | |
13214 | + kfree(conf); | |
13215 | + | |
13216 | + MOD_DEC_USE_COUNT; | |
13217 | + | |
13218 | + return 0; | |
13219 | +} | |
13220 | + | |
13221 | + | |
13222 | +static int translucent_map (mddev_t *mddev, kdev_t dev, kdev_t *rdev, | |
13223 | + unsigned long *rsector, unsigned long size) | |
13224 | +{ | |
13225 | + translucent_conf_t *conf = mddev_to_conf(mddev); | |
13226 | + | |
13227 | + *rdev = conf->disks[0].dev; | |
13228 | + | |
13229 | + return 0; | |
13230 | +} | |
13231 | + | |
13232 | +static int translucent_status (char *page, mddev_t *mddev) | |
13233 | +{ | |
13234 | + int sz = 0; | |
13235 | + | |
13236 | + sz += sprintf(page+sz, " %d%% full", 10); | |
13237 | + return sz; | |
13238 | +} | |
13239 | + | |
13240 | + | |
13241 | +static mdk_personality_t translucent_personality= | |
13242 | +{ | |
13243 | + "translucent", | |
13244 | + translucent_map, | |
13245 | + NULL, | |
13246 | + NULL, | |
13247 | + translucent_run, | |
13248 | + translucent_stop, | |
13249 | + translucent_status, | |
13250 | + NULL, | |
13251 | + 0, | |
13252 | + NULL, | |
13253 | + NULL, | |
13254 | + NULL, | |
13255 | + NULL | |
13256 | +}; | |
13257 | + | |
13258 | +#ifndef MODULE | |
13259 | + | |
13260 | +md__initfunc(void translucent_init (void)) | |
13261 | +{ | |
13262 | + register_md_personality (TRANSLUCENT, &translucent_personality); | |
13263 | +} | |
13264 | + | |
13265 | +#else | |
13266 | + | |
13267 | +int init_module (void) | |
13268 | +{ | |
13269 | + return (register_md_personality (TRANSLUCENT, &translucent_personality)); | |
13270 | +} | |
13271 | + | |
13272 | +void cleanup_module (void) | |
13273 | +{ | |
13274 | + unregister_md_personality (TRANSLUCENT); | |
13275 | +} | |
13276 | + | |
13277 | +#endif | |
13278 | + | |
13279 | --- linux/drivers/block/xor.c.orig Fri Nov 23 11:18:20 2001 | |
13280 | +++ linux/drivers/block/xor.c Fri Nov 23 11:18:20 2001 | |
13281 | @@ -0,0 +1,1894 @@ | |
13282 | +/* | |
13283 | + * xor.c : Multiple Devices driver for Linux | |
13284 | + * | |
13285 | + * Copyright (C) 1996, 1997, 1998, 1999 Ingo Molnar, Matti Aarnio, Jakub Jelinek | |
13286 | + * | |
13287 | + * | |
13288 | + * optimized RAID-5 checksumming functions. | |
13289 | + * | |
13290 | + * This program is free software; you can redistribute it and/or modify | |
13291 | + * it under the terms of the GNU General Public License as published by | |
13292 | + * the Free Software Foundation; either version 2, or (at your option) | |
13293 | + * any later version. | |
13294 | + * | |
13295 | + * You should have received a copy of the GNU General Public License | |
13296 | + * (for example /usr/src/linux/COPYING); if not, write to the Free | |
13297 | + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
13298 | + */ | |
13299 | +#include <linux/module.h> | |
13300 | +#include <linux/raid/md.h> | |
13301 | +#ifdef __sparc_v9__ | |
13302 | +#include <asm/head.h> | |
13303 | +#include <asm/asi.h> | |
13304 | +#include <asm/visasm.h> | |
13305 | +#endif | |
13306 | + | |
13307 | +/* | |
13308 | + * we use the 'XOR function template' to register multiple xor | |
13309 | + * functions runtime. The kernel measures their speed upon bootup | |
13310 | + * and decides which one to use. (compile-time registration is | |
13311 | + * not enough as certain CPU features like MMX can only be detected | |
13312 | + * runtime) | |
13313 | + * | |
13314 | + * this architecture makes it pretty easy to add new routines | |
13315 | + * that are faster on certain CPUs, without killing other CPU's | |
13316 | + * 'native' routine. Although the current routines are belived | |
13317 | + * to be the physically fastest ones on all CPUs tested, but | |
13318 | + * feel free to prove me wrong and add yet another routine =B-) | |
13319 | + * --mingo | |
13320 | + */ | |
13321 | + | |
13322 | +#define MAX_XOR_BLOCKS 5 | |
13323 | + | |
13324 | +#define XOR_ARGS (unsigned int count, struct buffer_head **bh_ptr) | |
13325 | + | |
13326 | +typedef void (*xor_block_t) XOR_ARGS; | |
13327 | +xor_block_t xor_block = NULL; | |
13328 | + | |
13329 | +#ifndef __sparc_v9__ | |
13330 | + | |
13331 | +struct xor_block_template; | |
13332 | + | |
13333 | +struct xor_block_template { | |
13334 | + char * name; | |
13335 | + xor_block_t xor_block; | |
13336 | + int speed; | |
13337 | + struct xor_block_template * next; | |
13338 | +}; | |
13339 | + | |
13340 | +struct xor_block_template * xor_functions = NULL; | |
13341 | + | |
13342 | +#define XORBLOCK_TEMPLATE(x) \ | |
13343 | +static void xor_block_##x XOR_ARGS; \ | |
13344 | +static struct xor_block_template t_xor_block_##x = \ | |
13345 | + { #x, xor_block_##x, 0, NULL }; \ | |
13346 | +static void xor_block_##x XOR_ARGS | |
13347 | + | |
13348 | +#ifdef __i386__ | |
13349 | + | |
13350 | +#ifdef CONFIG_X86_XMM | |
13351 | +/* | |
13352 | + * Cache avoiding checksumming functions utilizing KNI instructions | |
13353 | + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) | |
13354 | + */ | |
13355 | + | |
13356 | +XORBLOCK_TEMPLATE(pIII_kni) | |
13357 | +{ | |
13358 | + char xmm_save[16*4]; | |
13359 | + int cr0; | |
13360 | + int lines = (bh_ptr[0]->b_size>>8); | |
13361 | + | |
13362 | + __asm__ __volatile__ ( | |
13363 | + "movl %%cr0,%0 ;\n\t" | |
13364 | + "clts ;\n\t" | |
13365 | + "movups %%xmm0,(%1) ;\n\t" | |
13366 | + "movups %%xmm1,0x10(%1) ;\n\t" | |
13367 | + "movups %%xmm2,0x20(%1) ;\n\t" | |
13368 | + "movups %%xmm3,0x30(%1) ;\n\t" | |
13369 | + : "=r" (cr0) | |
13370 | + : "r" (xmm_save) | |
13371 | + : "memory" ); | |
13372 | + | |
13373 | +#define OFFS(x) "8*("#x"*2)" | |
13374 | +#define PF0(x) \ | |
13375 | + " prefetcht0 "OFFS(x)"(%1) ;\n" | |
13376 | +#define LD(x,y) \ | |
13377 | + " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" | |
13378 | +#define ST(x,y) \ | |
13379 | + " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" | |
13380 | +#define PF1(x) \ | |
13381 | + " prefetchnta "OFFS(x)"(%2) ;\n" | |
13382 | +#define PF2(x) \ | |
13383 | + " prefetchnta "OFFS(x)"(%3) ;\n" | |
13384 | +#define PF3(x) \ | |
13385 | + " prefetchnta "OFFS(x)"(%4) ;\n" | |
13386 | +#define PF4(x) \ | |
13387 | + " prefetchnta "OFFS(x)"(%5) ;\n" | |
13388 | +#define PF5(x) \ | |
13389 | + " prefetchnta "OFFS(x)"(%6) ;\n" | |
13390 | +#define XO1(x,y) \ | |
13391 | + " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" | |
13392 | +#define XO2(x,y) \ | |
13393 | + " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" | |
13394 | +#define XO3(x,y) \ | |
13395 | + " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" | |
13396 | +#define XO4(x,y) \ | |
13397 | + " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" | |
13398 | +#define XO5(x,y) \ | |
13399 | + " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" | |
13400 | + | |
13401 | + switch(count) { | |
13402 | + case 2: | |
13403 | + __asm__ __volatile__ ( | |
13404 | +#undef BLOCK | |
13405 | +#define BLOCK(i) \ | |
13406 | + LD(i,0) \ | |
13407 | + LD(i+1,1) \ | |
13408 | + PF1(i) \ | |
13409 | + PF1(i+2) \ | |
13410 | + LD(i+2,2) \ | |
13411 | + LD(i+3,3) \ | |
13412 | + PF0(i+4) \ | |
13413 | + PF0(i+6) \ | |
13414 | + XO1(i,0) \ | |
13415 | + XO1(i+1,1) \ | |
13416 | + XO1(i+2,2) \ | |
13417 | + XO1(i+3,3) \ | |
13418 | + ST(i,0) \ | |
13419 | + ST(i+1,1) \ | |
13420 | + ST(i+2,2) \ | |
13421 | + ST(i+3,3) \ | |
13422 | + | |
13423 | + | |
13424 | + PF0(0) | |
13425 | + PF0(2) | |
13426 | + | |
13427 | + " .align 32,0x90 ;\n" | |
13428 | + " 1: ;\n" | |
13429 | + | |
13430 | + BLOCK(0) | |
13431 | + BLOCK(4) | |
13432 | + BLOCK(8) | |
13433 | + BLOCK(12) | |
13434 | + | |
13435 | + " addl $256, %1 ;\n" | |
13436 | + " addl $256, %2 ;\n" | |
13437 | + " decl %0 ;\n" | |
13438 | + " jnz 1b ;\n" | |
13439 | + | |
13440 | + : | |
13441 | + : "r" (lines), | |
13442 | + "r" (bh_ptr[0]->b_data), | |
13443 | + "r" (bh_ptr[1]->b_data) | |
13444 | + : "memory" ); | |
13445 | + break; | |
13446 | + case 3: | |
13447 | + __asm__ __volatile__ ( | |
13448 | +#undef BLOCK | |
13449 | +#define BLOCK(i) \ | |
13450 | + PF1(i) \ | |
13451 | + PF1(i+2) \ | |
13452 | + LD(i,0) \ | |
13453 | + LD(i+1,1) \ | |
13454 | + LD(i+2,2) \ | |
13455 | + LD(i+3,3) \ | |
13456 | + PF2(i) \ | |
13457 | + PF2(i+2) \ | |
13458 | + PF0(i+4) \ | |
13459 | + PF0(i+6) \ | |
13460 | + XO1(i,0) \ | |
13461 | + XO1(i+1,1) \ | |
13462 | + XO1(i+2,2) \ | |
13463 | + XO1(i+3,3) \ | |
13464 | + XO2(i,0) \ | |
13465 | + XO2(i+1,1) \ | |
13466 | + XO2(i+2,2) \ | |
13467 | + XO2(i+3,3) \ | |
13468 | + ST(i,0) \ | |
13469 | + ST(i+1,1) \ | |
13470 | + ST(i+2,2) \ | |
13471 | + ST(i+3,3) \ | |
13472 | + | |
13473 | + | |
13474 | + PF0(0) | |
13475 | + PF0(2) | |
13476 | + | |
13477 | + " .align 32,0x90 ;\n" | |
13478 | + " 1: ;\n" | |
13479 | + | |
13480 | + BLOCK(0) | |
13481 | + BLOCK(4) | |
13482 | + BLOCK(8) | |
13483 | + BLOCK(12) | |
13484 | + | |
13485 | + " addl $256, %1 ;\n" | |
13486 | + " addl $256, %2 ;\n" | |
13487 | + " addl $256, %3 ;\n" | |
13488 | + " decl %0 ;\n" | |
13489 | + " jnz 1b ;\n" | |
13490 | + : | |
13491 | + : "r" (lines), | |
13492 | + "r" (bh_ptr[0]->b_data), | |
13493 | + "r" (bh_ptr[1]->b_data), | |
13494 | + "r" (bh_ptr[2]->b_data) | |
13495 | + : "memory" ); | |
13496 | + break; | |
13497 | + case 4: | |
13498 | + __asm__ __volatile__ ( | |
13499 | +#undef BLOCK | |
13500 | +#define BLOCK(i) \ | |
13501 | + PF1(i) \ | |
13502 | + PF1(i+2) \ | |
13503 | + LD(i,0) \ | |
13504 | + LD(i+1,1) \ | |
13505 | + LD(i+2,2) \ | |
13506 | + LD(i+3,3) \ | |
13507 | + PF2(i) \ | |
13508 | + PF2(i+2) \ | |
13509 | + XO1(i,0) \ | |
13510 | + XO1(i+1,1) \ | |
13511 | + XO1(i+2,2) \ | |
13512 | + XO1(i+3,3) \ | |
13513 | + PF3(i) \ | |
13514 | + PF3(i+2) \ | |
13515 | + PF0(i+4) \ | |
13516 | + PF0(i+6) \ | |
13517 | + XO2(i,0) \ | |
13518 | + XO2(i+1,1) \ | |
13519 | + XO2(i+2,2) \ | |
13520 | + XO2(i+3,3) \ | |
13521 | + XO3(i,0) \ | |
13522 | + XO3(i+1,1) \ | |
13523 | + XO3(i+2,2) \ | |
13524 | + XO3(i+3,3) \ | |
13525 | + ST(i,0) \ | |
13526 | + ST(i+1,1) \ | |
13527 | + ST(i+2,2) \ | |
13528 | + ST(i+3,3) \ | |
13529 | + | |
13530 | + | |
13531 | + PF0(0) | |
13532 | + PF0(2) | |
13533 | + | |
13534 | + " .align 32,0x90 ;\n" | |
13535 | + " 1: ;\n" | |
13536 | + | |
13537 | + BLOCK(0) | |
13538 | + BLOCK(4) | |
13539 | + BLOCK(8) | |
13540 | + BLOCK(12) | |
13541 | + | |
13542 | + " addl $256, %1 ;\n" | |
13543 | + " addl $256, %2 ;\n" | |
13544 | + " addl $256, %3 ;\n" | |
13545 | + " addl $256, %4 ;\n" | |
13546 | + " decl %0 ;\n" | |
13547 | + " jnz 1b ;\n" | |
13548 | + | |
13549 | + : | |
13550 | + : "r" (lines), | |
13551 | + "r" (bh_ptr[0]->b_data), | |
13552 | + "r" (bh_ptr[1]->b_data), | |
13553 | + "r" (bh_ptr[2]->b_data), | |
13554 | + "r" (bh_ptr[3]->b_data) | |
13555 | + : "memory" ); | |
13556 | + break; | |
13557 | + case 5: | |
13558 | + __asm__ __volatile__ ( | |
13559 | +#undef BLOCK | |
13560 | +#define BLOCK(i) \ | |
13561 | + PF1(i) \ | |
13562 | + PF1(i+2) \ | |
13563 | + LD(i,0) \ | |
13564 | + LD(i+1,1) \ | |
13565 | + LD(i+2,2) \ | |
13566 | + LD(i+3,3) \ | |
13567 | + PF2(i) \ | |
13568 | + PF2(i+2) \ | |
13569 | + XO1(i,0) \ | |
13570 | + XO1(i+1,1) \ | |
13571 | + XO1(i+2,2) \ | |
13572 | + XO1(i+3,3) \ | |
13573 | + PF3(i) \ | |
13574 | + PF3(i+2) \ | |
13575 | + XO2(i,0) \ | |
13576 | + XO2(i+1,1) \ | |
13577 | + XO2(i+2,2) \ | |
13578 | + XO2(i+3,3) \ | |
13579 | + PF4(i) \ | |
13580 | + PF4(i+2) \ | |
13581 | + PF0(i+4) \ | |
13582 | + PF0(i+6) \ | |
13583 | + XO3(i,0) \ | |
13584 | + XO3(i+1,1) \ | |
13585 | + XO3(i+2,2) \ | |
13586 | + XO3(i+3,3) \ | |
13587 | + XO4(i,0) \ | |
13588 | + XO4(i+1,1) \ | |
13589 | + XO4(i+2,2) \ | |
13590 | + XO4(i+3,3) \ | |
13591 | + ST(i,0) \ | |
13592 | + ST(i+1,1) \ | |
13593 | + ST(i+2,2) \ | |
13594 | + ST(i+3,3) \ | |
13595 | + | |
13596 | + | |
13597 | + PF0(0) | |
13598 | + PF0(2) | |
13599 | + | |
13600 | + " .align 32,0x90 ;\n" | |
13601 | + " 1: ;\n" | |
13602 | + | |
13603 | + BLOCK(0) | |
13604 | + BLOCK(4) | |
13605 | + BLOCK(8) | |
13606 | + BLOCK(12) | |
13607 | + | |
13608 | + " addl $256, %1 ;\n" | |
13609 | + " addl $256, %2 ;\n" | |
13610 | + " addl $256, %3 ;\n" | |
13611 | + " addl $256, %4 ;\n" | |
13612 | + " addl $256, %5 ;\n" | |
13613 | + " decl %0 ;\n" | |
13614 | + " jnz 1b ;\n" | |
13615 | + | |
13616 | + : | |
13617 | + : "r" (lines), | |
13618 | + "r" (bh_ptr[0]->b_data), | |
13619 | + "r" (bh_ptr[1]->b_data), | |
13620 | + "r" (bh_ptr[2]->b_data), | |
13621 | + "r" (bh_ptr[3]->b_data), | |
13622 | + "r" (bh_ptr[4]->b_data) | |
13623 | + : "memory"); | |
13624 | + break; | |
13625 | + } | |
13626 | + | |
13627 | + __asm__ __volatile__ ( | |
13628 | + "sfence ;\n\t" | |
13629 | + "movups (%1),%%xmm0 ;\n\t" | |
13630 | + "movups 0x10(%1),%%xmm1 ;\n\t" | |
13631 | + "movups 0x20(%1),%%xmm2 ;\n\t" | |
13632 | + "movups 0x30(%1),%%xmm3 ;\n\t" | |
13633 | + "movl %0,%%cr0 ;\n\t" | |
13634 | + : | |
13635 | + : "r" (cr0), "r" (xmm_save) | |
13636 | + : "memory" ); | |
13637 | +} | |
13638 | + | |
13639 | +#undef OFFS | |
13640 | +#undef LD | |
13641 | +#undef ST | |
13642 | +#undef PF0 | |
13643 | +#undef PF1 | |
13644 | +#undef PF2 | |
13645 | +#undef PF3 | |
13646 | +#undef PF4 | |
13647 | +#undef PF5 | |
13648 | +#undef XO1 | |
13649 | +#undef XO2 | |
13650 | +#undef XO3 | |
13651 | +#undef XO4 | |
13652 | +#undef XO5 | |
13653 | +#undef BLOCK | |
13654 | + | |
13655 | +#endif /* CONFIG_X86_XMM */ | |
13656 | + | |
13657 | +/* | |
13658 | + * high-speed RAID5 checksumming functions utilizing MMX instructions | |
13659 | + * Copyright (C) 1998 Ingo Molnar | |
13660 | + */ | |
13661 | +XORBLOCK_TEMPLATE(pII_mmx) | |
13662 | +{ | |
13663 | + char fpu_save[108]; | |
13664 | + int lines = (bh_ptr[0]->b_size>>7); | |
13665 | + | |
13666 | + if (!(current->flags & PF_USEDFPU)) | |
13667 | + __asm__ __volatile__ ( " clts;\n"); | |
13668 | + | |
13669 | + __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); | |
13670 | + | |
13671 | +#define LD(x,y) \ | |
13672 | + " movq 8*("#x")(%1), %%mm"#y" ;\n" | |
13673 | +#define ST(x,y) \ | |
13674 | + " movq %%mm"#y", 8*("#x")(%1) ;\n" | |
13675 | +#define XO1(x,y) \ | |
13676 | + " pxor 8*("#x")(%2), %%mm"#y" ;\n" | |
13677 | +#define XO2(x,y) \ | |
13678 | + " pxor 8*("#x")(%3), %%mm"#y" ;\n" | |
13679 | +#define XO3(x,y) \ | |
13680 | + " pxor 8*("#x")(%4), %%mm"#y" ;\n" | |
13681 | +#define XO4(x,y) \ | |
13682 | + " pxor 8*("#x")(%5), %%mm"#y" ;\n" | |
13683 | + | |
13684 | + switch(count) { | |
13685 | + case 2: | |
13686 | + __asm__ __volatile__ ( | |
13687 | +#undef BLOCK | |
13688 | +#define BLOCK(i) \ | |
13689 | + LD(i,0) \ | |
13690 | + LD(i+1,1) \ | |
13691 | + LD(i+2,2) \ | |
13692 | + LD(i+3,3) \ | |
13693 | + XO1(i,0) \ | |
13694 | + ST(i,0) \ | |
13695 | + XO1(i+1,1) \ | |
13696 | + ST(i+1,1) \ | |
13697 | + XO1(i+2,2) \ | |
13698 | + ST(i+2,2) \ | |
13699 | + XO1(i+3,3) \ | |
13700 | + ST(i+3,3) | |
13701 | + | |
13702 | + " .align 32,0x90 ;\n" | |
13703 | + " 1: ;\n" | |
13704 | + | |
13705 | + BLOCK(0) | |
13706 | + BLOCK(4) | |
13707 | + BLOCK(8) | |
13708 | + BLOCK(12) | |
13709 | + | |
13710 | + " addl $128, %1 ;\n" | |
13711 | + " addl $128, %2 ;\n" | |
13712 | + " decl %0 ;\n" | |
13713 | + " jnz 1b ;\n" | |
13714 | + : | |
13715 | + : "r" (lines), | |
13716 | + "r" (bh_ptr[0]->b_data), | |
13717 | + "r" (bh_ptr[1]->b_data) | |
13718 | + : "memory"); | |
13719 | + break; | |
13720 | + case 3: | |
13721 | + __asm__ __volatile__ ( | |
13722 | +#undef BLOCK | |
13723 | +#define BLOCK(i) \ | |
13724 | + LD(i,0) \ | |
13725 | + LD(i+1,1) \ | |
13726 | + LD(i+2,2) \ | |
13727 | + LD(i+3,3) \ | |
13728 | + XO1(i,0) \ | |
13729 | + XO1(i+1,1) \ | |
13730 | + XO1(i+2,2) \ | |
13731 | + XO1(i+3,3) \ | |
13732 | + XO2(i,0) \ | |
13733 | + ST(i,0) \ | |
13734 | + XO2(i+1,1) \ | |
13735 | + ST(i+1,1) \ | |
13736 | + XO2(i+2,2) \ | |
13737 | + ST(i+2,2) \ | |
13738 | + XO2(i+3,3) \ | |
13739 | + ST(i+3,3) | |
13740 | + | |
13741 | + " .align 32,0x90 ;\n" | |
13742 | + " 1: ;\n" | |
13743 | + | |
13744 | + BLOCK(0) | |
13745 | + BLOCK(4) | |
13746 | + BLOCK(8) | |
13747 | + BLOCK(12) | |
13748 | + | |
13749 | + " addl $128, %1 ;\n" | |
13750 | + " addl $128, %2 ;\n" | |
13751 | + " addl $128, %3 ;\n" | |
13752 | + " decl %0 ;\n" | |
13753 | + " jnz 1b ;\n" | |
13754 | + : | |
13755 | + : "r" (lines), | |
13756 | + "r" (bh_ptr[0]->b_data), | |
13757 | + "r" (bh_ptr[1]->b_data), | |
13758 | + "r" (bh_ptr[2]->b_data) | |
13759 | + : "memory"); | |
13760 | + break; | |
13761 | + case 4: | |
13762 | + __asm__ __volatile__ ( | |
13763 | +#undef BLOCK | |
13764 | +#define BLOCK(i) \ | |
13765 | + LD(i,0) \ | |
13766 | + LD(i+1,1) \ | |
13767 | + LD(i+2,2) \ | |
13768 | + LD(i+3,3) \ | |
13769 | + XO1(i,0) \ | |
13770 | + XO1(i+1,1) \ | |
13771 | + XO1(i+2,2) \ | |
13772 | + XO1(i+3,3) \ | |
13773 | + XO2(i,0) \ | |
13774 | + XO2(i+1,1) \ | |
13775 | + XO2(i+2,2) \ | |
13776 | + XO2(i+3,3) \ | |
13777 | + XO3(i,0) \ | |
13778 | + ST(i,0) \ | |
13779 | + XO3(i+1,1) \ | |
13780 | + ST(i+1,1) \ | |
13781 | + XO3(i+2,2) \ | |
13782 | + ST(i+2,2) \ | |
13783 | + XO3(i+3,3) \ | |
13784 | + ST(i+3,3) | |
13785 | + | |
13786 | + " .align 32,0x90 ;\n" | |
13787 | + " 1: ;\n" | |
13788 | + | |
13789 | + BLOCK(0) | |
13790 | + BLOCK(4) | |
13791 | + BLOCK(8) | |
13792 | + BLOCK(12) | |
13793 | + | |
13794 | + " addl $128, %1 ;\n" | |
13795 | + " addl $128, %2 ;\n" | |
13796 | + " addl $128, %3 ;\n" | |
13797 | + " addl $128, %4 ;\n" | |
13798 | + " decl %0 ;\n" | |
13799 | + " jnz 1b ;\n" | |
13800 | + : | |
13801 | + : "r" (lines), | |
13802 | + "r" (bh_ptr[0]->b_data), | |
13803 | + "r" (bh_ptr[1]->b_data), | |
13804 | + "r" (bh_ptr[2]->b_data), | |
13805 | + "r" (bh_ptr[3]->b_data) | |
13806 | + : "memory"); | |
13807 | + break; | |
13808 | + case 5: | |
13809 | + __asm__ __volatile__ ( | |
13810 | +#undef BLOCK | |
13811 | +#define BLOCK(i) \ | |
13812 | + LD(i,0) \ | |
13813 | + LD(i+1,1) \ | |
13814 | + LD(i+2,2) \ | |
13815 | + LD(i+3,3) \ | |
13816 | + XO1(i,0) \ | |
13817 | + XO1(i+1,1) \ | |
13818 | + XO1(i+2,2) \ | |
13819 | + XO1(i+3,3) \ | |
13820 | + XO2(i,0) \ | |
13821 | + XO2(i+1,1) \ | |
13822 | + XO2(i+2,2) \ | |
13823 | + XO2(i+3,3) \ | |
13824 | + XO3(i,0) \ | |
13825 | + XO3(i+1,1) \ | |
13826 | + XO3(i+2,2) \ | |
13827 | + XO3(i+3,3) \ | |
13828 | + XO4(i,0) \ | |
13829 | + ST(i,0) \ | |
13830 | + XO4(i+1,1) \ | |
13831 | + ST(i+1,1) \ | |
13832 | + XO4(i+2,2) \ | |
13833 | + ST(i+2,2) \ | |
13834 | + XO4(i+3,3) \ | |
13835 | + ST(i+3,3) | |
13836 | + | |
13837 | + " .align 32,0x90 ;\n" | |
13838 | + " 1: ;\n" | |
13839 | + | |
13840 | + BLOCK(0) | |
13841 | + BLOCK(4) | |
13842 | + BLOCK(8) | |
13843 | + BLOCK(12) | |
13844 | + | |
13845 | + " addl $128, %1 ;\n" | |
13846 | + " addl $128, %2 ;\n" | |
13847 | + " addl $128, %3 ;\n" | |
13848 | + " addl $128, %4 ;\n" | |
13849 | + " addl $128, %5 ;\n" | |
13850 | + " decl %0 ;\n" | |
13851 | + " jnz 1b ;\n" | |
13852 | + : | |
13853 | + : "r" (lines), | |
13854 | + "r" (bh_ptr[0]->b_data), | |
13855 | + "r" (bh_ptr[1]->b_data), | |
13856 | + "r" (bh_ptr[2]->b_data), | |
13857 | + "r" (bh_ptr[3]->b_data), | |
13858 | + "r" (bh_ptr[4]->b_data) | |
13859 | + : "memory"); | |
13860 | + break; | |
13861 | + } | |
13862 | + | |
13863 | + __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) ); | |
13864 | + | |
13865 | + if (!(current->flags & PF_USEDFPU)) | |
13866 | + stts(); | |
13867 | +} | |
13868 | + | |
13869 | +#undef LD | |
13870 | +#undef XO1 | |
13871 | +#undef XO2 | |
13872 | +#undef XO3 | |
13873 | +#undef XO4 | |
13874 | +#undef ST | |
13875 | +#undef BLOCK | |
13876 | + | |
13877 | +XORBLOCK_TEMPLATE(p5_mmx) | |
13878 | +{ | |
13879 | + char fpu_save[108]; | |
13880 | + int lines = (bh_ptr[0]->b_size>>6); | |
13881 | + | |
13882 | + if (!(current->flags & PF_USEDFPU)) | |
13883 | + __asm__ __volatile__ ( " clts;\n"); | |
13884 | + | |
13885 | + __asm__ __volatile__ ( " fsave %0; fwait\n"::"m"(fpu_save[0]) ); | |
13886 | + | |
13887 | + switch(count) { | |
13888 | + case 2: | |
13889 | + __asm__ __volatile__ ( | |
13890 | + | |
13891 | + " .align 32,0x90 ;\n" | |
13892 | + " 1: ;\n" | |
13893 | + " movq (%1), %%mm0 ;\n" | |
13894 | + " movq 8(%1), %%mm1 ;\n" | |
13895 | + " pxor (%2), %%mm0 ;\n" | |
13896 | + " movq 16(%1), %%mm2 ;\n" | |
13897 | + " movq %%mm0, (%1) ;\n" | |
13898 | + " pxor 8(%2), %%mm1 ;\n" | |
13899 | + " movq 24(%1), %%mm3 ;\n" | |
13900 | + " movq %%mm1, 8(%1) ;\n" | |
13901 | + " pxor 16(%2), %%mm2 ;\n" | |
13902 | + " movq 32(%1), %%mm4 ;\n" | |
13903 | + " movq %%mm2, 16(%1) ;\n" | |
13904 | + " pxor 24(%2), %%mm3 ;\n" | |
13905 | + " movq 40(%1), %%mm5 ;\n" | |
13906 | + " movq %%mm3, 24(%1) ;\n" | |
13907 | + " pxor 32(%2), %%mm4 ;\n" | |
13908 | + " movq 48(%1), %%mm6 ;\n" | |
13909 | + " movq %%mm4, 32(%1) ;\n" | |
13910 | + " pxor 40(%2), %%mm5 ;\n" | |
13911 | + " movq 56(%1), %%mm7 ;\n" | |
13912 | + " movq %%mm5, 40(%1) ;\n" | |
13913 | + " pxor 48(%2), %%mm6 ;\n" | |
13914 | + " pxor 56(%2), %%mm7 ;\n" | |
13915 | + " movq %%mm6, 48(%1) ;\n" | |
13916 | + " movq %%mm7, 56(%1) ;\n" | |
13917 | + | |
13918 | + " addl $64, %1 ;\n" | |
13919 | + " addl $64, %2 ;\n" | |
13920 | + " decl %0 ;\n" | |
13921 | + " jnz 1b ;\n" | |
13922 | + | |
13923 | + : | |
13924 | + : "r" (lines), | |
13925 | + "r" (bh_ptr[0]->b_data), | |
13926 | + "r" (bh_ptr[1]->b_data) | |
13927 | + : "memory" ); | |
13928 | + break; | |
13929 | + case 3: | |
13930 | + __asm__ __volatile__ ( | |
13931 | + | |
13932 | + " .align 32,0x90 ;\n" | |
13933 | + " 1: ;\n" | |
13934 | + " movq (%1), %%mm0 ;\n" | |
13935 | + " movq 8(%1), %%mm1 ;\n" | |
13936 | + " pxor (%2), %%mm0 ;\n" | |
13937 | + " movq 16(%1), %%mm2 ;\n" | |
13938 | + " pxor 8(%2), %%mm1 ;\n" | |
13939 | + " pxor (%3), %%mm0 ;\n" | |
13940 | + " pxor 16(%2), %%mm2 ;\n" | |
13941 | + " movq %%mm0, (%1) ;\n" | |
13942 | + " pxor 8(%3), %%mm1 ;\n" | |
13943 | + " pxor 16(%3), %%mm2 ;\n" | |
13944 | + " movq 24(%1), %%mm3 ;\n" | |
13945 | + " movq %%mm1, 8(%1) ;\n" | |
13946 | + " movq 32(%1), %%mm4 ;\n" | |
13947 | + " movq 40(%1), %%mm5 ;\n" | |
13948 | + " pxor 24(%2), %%mm3 ;\n" | |
13949 | + " movq %%mm2, 16(%1) ;\n" | |
13950 | + " pxor 32(%2), %%mm4 ;\n" | |
13951 | + " pxor 24(%3), %%mm3 ;\n" | |
13952 | + " pxor 40(%2), %%mm5 ;\n" | |
13953 | + " movq %%mm3, 24(%1) ;\n" | |
13954 | + " pxor 32(%3), %%mm4 ;\n" | |
13955 | + " pxor 40(%3), %%mm5 ;\n" | |
13956 | + " movq 48(%1), %%mm6 ;\n" | |
13957 | + " movq %%mm4, 32(%1) ;\n" | |
13958 | + " movq 56(%1), %%mm7 ;\n" | |
13959 | + " pxor 48(%2), %%mm6 ;\n" | |
13960 | + " movq %%mm5, 40(%1) ;\n" | |
13961 | + " pxor 56(%2), %%mm7 ;\n" | |
13962 | + " pxor 48(%3), %%mm6 ;\n" | |
13963 | + " pxor 56(%3), %%mm7 ;\n" | |
13964 | + " movq %%mm6, 48(%1) ;\n" | |
13965 | + " movq %%mm7, 56(%1) ;\n" | |
13966 | + | |
13967 | + " addl $64, %1 ;\n" | |
13968 | + " addl $64, %2 ;\n" | |
13969 | + " addl $64, %3 ;\n" | |
13970 | + " decl %0 ;\n" | |
13971 | + " jnz 1b ;\n" | |
13972 | + | |
13973 | + : | |
13974 | + : "r" (lines), | |
13975 | + "r" (bh_ptr[0]->b_data), | |
13976 | + "r" (bh_ptr[1]->b_data), | |
13977 | + "r" (bh_ptr[2]->b_data) | |
13978 | + : "memory" ); | |
13979 | + break; | |
13980 | + case 4: | |
13981 | + __asm__ __volatile__ ( | |
13982 | + | |
13983 | + " .align 32,0x90 ;\n" | |
13984 | + " 1: ;\n" | |
13985 | + " movq (%1), %%mm0 ;\n" | |
13986 | + " movq 8(%1), %%mm1 ;\n" | |
13987 | + " pxor (%2), %%mm0 ;\n" | |
13988 | + " movq 16(%1), %%mm2 ;\n" | |
13989 | + " pxor 8(%2), %%mm1 ;\n" | |
13990 | + " pxor (%3), %%mm0 ;\n" | |
13991 | + " pxor 16(%2), %%mm2 ;\n" | |
13992 | + " pxor 8(%3), %%mm1 ;\n" | |
13993 | + " pxor (%4), %%mm0 ;\n" | |
13994 | + " movq 24(%1), %%mm3 ;\n" | |
13995 | + " pxor 16(%3), %%mm2 ;\n" | |
13996 | + " pxor 8(%4), %%mm1 ;\n" | |
13997 | + " movq %%mm0, (%1) ;\n" | |
13998 | + " movq 32(%1), %%mm4 ;\n" | |
13999 | + " pxor 24(%2), %%mm3 ;\n" | |
14000 | + " pxor 16(%4), %%mm2 ;\n" | |
14001 | + " movq %%mm1, 8(%1) ;\n" | |
14002 | + " movq 40(%1), %%mm5 ;\n" | |
14003 | + " pxor 32(%2), %%mm4 ;\n" | |
14004 | + " pxor 24(%3), %%mm3 ;\n" | |
14005 | + " movq %%mm2, 16(%1) ;\n" | |
14006 | + " pxor 40(%2), %%mm5 ;\n" | |
14007 | + " pxor 32(%3), %%mm4 ;\n" | |
14008 | + " pxor 24(%4), %%mm3 ;\n" | |
14009 | + " movq %%mm3, 24(%1) ;\n" | |
14010 | + " movq 56(%1), %%mm7 ;\n" | |
14011 | + " movq 48(%1), %%mm6 ;\n" | |
14012 | + " pxor 40(%3), %%mm5 ;\n" | |
14013 | + " pxor 32(%4), %%mm4 ;\n" | |
14014 | + " pxor 48(%2), %%mm6 ;\n" | |
14015 | + " movq %%mm4, 32(%1) ;\n" | |
14016 | + " pxor 56(%2), %%mm7 ;\n" | |
14017 | + " pxor 40(%4), %%mm5 ;\n" | |
14018 | + " pxor 48(%3), %%mm6 ;\n" | |
14019 | + " pxor 56(%3), %%mm7 ;\n" | |
14020 | + " movq %%mm5, 40(%1) ;\n" | |
14021 | + " pxor 48(%4), %%mm6 ;\n" | |
14022 | + " pxor 56(%4), %%mm7 ;\n" | |
14023 | + " movq %%mm6, 48(%1) ;\n" | |
14024 | + " movq %%mm7, 56(%1) ;\n" | |
14025 | + | |
14026 | + " addl $64, %1 ;\n" | |
14027 | + " addl $64, %2 ;\n" | |
14028 | + " addl $64, %3 ;\n" | |
14029 | + " addl $64, %4 ;\n" | |
14030 | + " decl %0 ;\n" | |
14031 | + " jnz 1b ;\n" | |
14032 | + | |
14033 | + : | |
14034 | + : "r" (lines), | |
14035 | + "r" (bh_ptr[0]->b_data), | |
14036 | + "r" (bh_ptr[1]->b_data), | |
14037 | + "r" (bh_ptr[2]->b_data), | |
14038 | + "r" (bh_ptr[3]->b_data) | |
14039 | + : "memory" ); | |
14040 | + break; | |
14041 | + case 5: | |
14042 | + __asm__ __volatile__ ( | |
14043 | + | |
14044 | + " .align 32,0x90 ;\n" | |
14045 | + " 1: ;\n" | |
14046 | + " movq (%1), %%mm0 ;\n" | |
14047 | + " movq 8(%1), %%mm1 ;\n" | |
14048 | + " pxor (%2), %%mm0 ;\n" | |
14049 | + " pxor 8(%2), %%mm1 ;\n" | |
14050 | + " movq 16(%1), %%mm2 ;\n" | |
14051 | + " pxor (%3), %%mm0 ;\n" | |
14052 | + " pxor 8(%3), %%mm1 ;\n" | |
14053 | + " pxor 16(%2), %%mm2 ;\n" | |
14054 | + " pxor (%4), %%mm0 ;\n" | |
14055 | + " pxor 8(%4), %%mm1 ;\n" | |
14056 | + " pxor 16(%3), %%mm2 ;\n" | |
14057 | + " movq 24(%1), %%mm3 ;\n" | |
14058 | + " pxor (%5), %%mm0 ;\n" | |
14059 | + " pxor 8(%5), %%mm1 ;\n" | |
14060 | + " movq %%mm0, (%1) ;\n" | |
14061 | + " pxor 16(%4), %%mm2 ;\n" | |
14062 | + " pxor 24(%2), %%mm3 ;\n" | |
14063 | + " movq %%mm1, 8(%1) ;\n" | |
14064 | + " pxor 16(%5), %%mm2 ;\n" | |
14065 | + " pxor 24(%3), %%mm3 ;\n" | |
14066 | + " movq 32(%1), %%mm4 ;\n" | |
14067 | + " movq %%mm2, 16(%1) ;\n" | |
14068 | + " pxor 24(%4), %%mm3 ;\n" | |
14069 | + " pxor 32(%2), %%mm4 ;\n" | |
14070 | + " movq 40(%1), %%mm5 ;\n" | |
14071 | + " pxor 24(%5), %%mm3 ;\n" | |
14072 | + " pxor 32(%3), %%mm4 ;\n" | |
14073 | + " pxor 40(%2), %%mm5 ;\n" | |
14074 | + " movq %%mm3, 24(%1) ;\n" | |
14075 | + " pxor 32(%4), %%mm4 ;\n" | |
14076 | + " pxor 40(%3), %%mm5 ;\n" | |
14077 | + " movq 48(%1), %%mm6 ;\n" | |
14078 | + " movq 56(%1), %%mm7 ;\n" | |
14079 | + " pxor 32(%5), %%mm4 ;\n" | |
14080 | + " pxor 40(%4), %%mm5 ;\n" | |
14081 | + " pxor 48(%2), %%mm6 ;\n" | |
14082 | + " pxor 56(%2), %%mm7 ;\n" | |
14083 | + " movq %%mm4, 32(%1) ;\n" | |
14084 | + " pxor 48(%3), %%mm6 ;\n" | |
14085 | + " pxor 56(%3), %%mm7 ;\n" | |
14086 | + " pxor 40(%5), %%mm5 ;\n" | |
14087 | + " pxor 48(%4), %%mm6 ;\n" | |
14088 | + " pxor 56(%4), %%mm7 ;\n" | |
14089 | + " movq %%mm5, 40(%1) ;\n" | |
14090 | + " pxor 48(%5), %%mm6 ;\n" | |
14091 | + " pxor 56(%5), %%mm7 ;\n" | |
14092 | + " movq %%mm6, 48(%1) ;\n" | |
14093 | + " movq %%mm7, 56(%1) ;\n" | |
14094 | + | |
14095 | + " addl $64, %1 ;\n" | |
14096 | + " addl $64, %2 ;\n" | |
14097 | + " addl $64, %3 ;\n" | |
14098 | + " addl $64, %4 ;\n" | |
14099 | + " addl $64, %5 ;\n" | |
14100 | + " decl %0 ;\n" | |
14101 | + " jnz 1b ;\n" | |
14102 | + | |
14103 | + : | |
14104 | + : "r" (lines), | |
14105 | + "r" (bh_ptr[0]->b_data), | |
14106 | + "r" (bh_ptr[1]->b_data), | |
14107 | + "r" (bh_ptr[2]->b_data), | |
14108 | + "r" (bh_ptr[3]->b_data), | |
14109 | + "r" (bh_ptr[4]->b_data) | |
14110 | + : "memory" ); | |
14111 | + break; | |
14112 | + } | |
14113 | + | |
14114 | + __asm__ __volatile__ ( " frstor %0;\n"::"m"(fpu_save[0]) ); | |
14115 | + | |
14116 | + if (!(current->flags & PF_USEDFPU)) | |
14117 | + stts(); | |
14118 | +} | |
14119 | +#endif /* __i386__ */ | |
14120 | +#endif /* !__sparc_v9__ */ | |
14121 | + | |
14122 | +#ifdef __sparc_v9__ | |
14123 | +/* | |
14124 | + * High speed xor_block operation for RAID4/5 utilizing the | |
14125 | + * UltraSparc Visual Instruction Set. | |
14126 | + * | |
14127 | + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) | |
14128 | + * | |
14129 | + * Requirements: | |
14130 | + * !(((long)dest | (long)sourceN) & (64 - 1)) && | |
14131 | + * !(len & 127) && len >= 256 | |
14132 | + * | |
14133 | + * It is done in pure assembly, as otherwise gcc makes it | |
14134 | + * a non-leaf function, which is not what we want. | |
14135 | + * Also, we don't measure the speeds as on other architectures, | |
14136 | + * as the measuring routine does not take into account cold caches | |
14137 | + * and the fact that xor_block_VIS bypasses the caches. | |
14138 | + * xor_block_32regs might be 5% faster for count 2 if caches are hot | |
14139 | + * and things just right (for count 3 VIS is about as fast as 32regs for | |
14140 | + * hot caches and for count 4 and 5 VIS is faster by good margin always), | |
14141 | + * but I think it is better not to pollute the caches. | |
14142 | + * Actually, if I'd just fight for speed for hot caches, I could | |
14143 | + * write a hybrid VIS/integer routine, which would do always two | |
14144 | + * 64B blocks in VIS and two in IEUs, but I really care more about | |
14145 | + * caches. | |
14146 | + */ | |
14147 | +extern void *VISenter(void); | |
14148 | +extern void xor_block_VIS XOR_ARGS; | |
14149 | + | |
14150 | +void __xor_block_VIS(void) | |
14151 | +{ | |
14152 | +__asm__ (" | |
14153 | + .globl xor_block_VIS | |
14154 | +xor_block_VIS: | |
14155 | + ldx [%%o1 + 0], %%o4 | |
14156 | + ldx [%%o1 + 8], %%o3 | |
14157 | + ldx [%%o4 + %1], %%g5 | |
14158 | + ldx [%%o4 + %0], %%o4 | |
14159 | + ldx [%%o3 + %0], %%o3 | |
14160 | + rd %%fprs, %%o5 | |
14161 | + andcc %%o5, %2, %%g0 | |
14162 | + be,pt %%icc, 297f | |
14163 | + sethi %%hi(%5), %%g1 | |
14164 | + jmpl %%g1 + %%lo(%5), %%g7 | |
14165 | + add %%g7, 8, %%g7 | |
14166 | +297: wr %%g0, %4, %%fprs | |
14167 | + membar #LoadStore|#StoreLoad|#StoreStore | |
14168 | + sub %%g5, 64, %%g5 | |
14169 | + ldda [%%o4] %3, %%f0 | |
14170 | + ldda [%%o3] %3, %%f16 | |
14171 | + cmp %%o0, 4 | |
14172 | + bgeu,pt %%xcc, 10f | |
14173 | + cmp %%o0, 3 | |
14174 | + be,pn %%xcc, 13f | |
14175 | + mov -64, %%g1 | |
14176 | + sub %%g5, 64, %%g5 | |
14177 | + rd %%asi, %%g1 | |
14178 | + wr %%g0, %3, %%asi | |
14179 | + | |
14180 | +2: ldda [%%o4 + 64] %%asi, %%f32 | |
14181 | + fxor %%f0, %%f16, %%f16 | |
14182 | + fxor %%f2, %%f18, %%f18 | |
14183 | + fxor %%f4, %%f20, %%f20 | |
14184 | + fxor %%f6, %%f22, %%f22 | |
14185 | + fxor %%f8, %%f24, %%f24 | |
14186 | + fxor %%f10, %%f26, %%f26 | |
14187 | + fxor %%f12, %%f28, %%f28 | |
14188 | + fxor %%f14, %%f30, %%f30 | |
14189 | + stda %%f16, [%%o4] %3 | |
14190 | + ldda [%%o3 + 64] %%asi, %%f48 | |
14191 | + ldda [%%o4 + 128] %%asi, %%f0 | |
14192 | + fxor %%f32, %%f48, %%f48 | |
14193 | + fxor %%f34, %%f50, %%f50 | |
14194 | + add %%o4, 128, %%o4 | |
14195 | + fxor %%f36, %%f52, %%f52 | |
14196 | + add %%o3, 128, %%o3 | |
14197 | + fxor %%f38, %%f54, %%f54 | |
14198 | + subcc %%g5, 128, %%g5 | |
14199 | + fxor %%f40, %%f56, %%f56 | |
14200 | + fxor %%f42, %%f58, %%f58 | |
14201 | + fxor %%f44, %%f60, %%f60 | |
14202 | + fxor %%f46, %%f62, %%f62 | |
14203 | + stda %%f48, [%%o4 - 64] %%asi | |
14204 | + bne,pt %%xcc, 2b | |
14205 | + ldda [%%o3] %3, %%f16 | |
14206 | + | |
14207 | + ldda [%%o4 + 64] %%asi, %%f32 | |
14208 | + fxor %%f0, %%f16, %%f16 | |
14209 | + fxor %%f2, %%f18, %%f18 | |
14210 | + fxor %%f4, %%f20, %%f20 | |
14211 | + fxor %%f6, %%f22, %%f22 | |
14212 | + fxor %%f8, %%f24, %%f24 | |
14213 | + fxor %%f10, %%f26, %%f26 | |
14214 | + fxor %%f12, %%f28, %%f28 | |
14215 | + fxor %%f14, %%f30, %%f30 | |
14216 | + stda %%f16, [%%o4] %3 | |
14217 | + ldda [%%o3 + 64] %%asi, %%f48 | |
14218 | + membar #Sync | |
14219 | + fxor %%f32, %%f48, %%f48 | |
14220 | + fxor %%f34, %%f50, %%f50 | |
14221 | + fxor %%f36, %%f52, %%f52 | |
14222 | + fxor %%f38, %%f54, %%f54 | |
14223 | + fxor %%f40, %%f56, %%f56 | |
14224 | + fxor %%f42, %%f58, %%f58 | |
14225 | + fxor %%f44, %%f60, %%f60 | |
14226 | + fxor %%f46, %%f62, %%f62 | |
14227 | + stda %%f48, [%%o4 + 64] %%asi | |
14228 | + membar #Sync|#StoreStore|#StoreLoad | |
14229 | + wr %%g0, 0, %%fprs | |
14230 | + retl | |
14231 | + wr %%g1, %%g0, %%asi | |
14232 | + | |
14233 | +13: ldx [%%o1 + 16], %%o2 | |
14234 | + ldx [%%o2 + %0], %%o2 | |
14235 | + | |
14236 | +3: ldda [%%o2] %3, %%f32 | |
14237 | + fxor %%f0, %%f16, %%f48 | |
14238 | + fxor %%f2, %%f18, %%f50 | |
14239 | + add %%o4, 64, %%o4 | |
14240 | + fxor %%f4, %%f20, %%f52 | |
14241 | + fxor %%f6, %%f22, %%f54 | |
14242 | + add %%o3, 64, %%o3 | |
14243 | + fxor %%f8, %%f24, %%f56 | |
14244 | + fxor %%f10, %%f26, %%f58 | |
14245 | + fxor %%f12, %%f28, %%f60 | |
14246 | + fxor %%f14, %%f30, %%f62 | |
14247 | + ldda [%%o4] %3, %%f0 | |
14248 | + fxor %%f48, %%f32, %%f48 | |
14249 | + fxor %%f50, %%f34, %%f50 | |
14250 | + fxor %%f52, %%f36, %%f52 | |
14251 | + fxor %%f54, %%f38, %%f54 | |
14252 | + add %%o2, 64, %%o2 | |
14253 | + fxor %%f56, %%f40, %%f56 | |
14254 | + fxor %%f58, %%f42, %%f58 | |
14255 | + subcc %%g5, 64, %%g5 | |
14256 | + fxor %%f60, %%f44, %%f60 | |
14257 | + fxor %%f62, %%f46, %%f62 | |
14258 | + stda %%f48, [%%o4 + %%g1] %3 | |
14259 | + bne,pt %%xcc, 3b | |
14260 | + ldda [%%o3] %3, %%f16 | |
14261 | + | |
14262 | + ldda [%%o2] %3, %%f32 | |
14263 | + fxor %%f0, %%f16, %%f48 | |
14264 | + fxor %%f2, %%f18, %%f50 | |
14265 | + fxor %%f4, %%f20, %%f52 | |
14266 | + fxor %%f6, %%f22, %%f54 | |
14267 | + fxor %%f8, %%f24, %%f56 | |
14268 | + fxor %%f10, %%f26, %%f58 | |
14269 | + fxor %%f12, %%f28, %%f60 | |
14270 | + fxor %%f14, %%f30, %%f62 | |
14271 | + membar #Sync | |
14272 | + fxor %%f48, %%f32, %%f48 | |
14273 | + fxor %%f50, %%f34, %%f50 | |
14274 | + fxor %%f52, %%f36, %%f52 | |
14275 | + fxor %%f54, %%f38, %%f54 | |
14276 | + fxor %%f56, %%f40, %%f56 | |
14277 | + fxor %%f58, %%f42, %%f58 | |
14278 | + fxor %%f60, %%f44, %%f60 | |
14279 | + fxor %%f62, %%f46, %%f62 | |
14280 | + stda %%f48, [%%o4] %3 | |
14281 | + membar #Sync|#StoreStore|#StoreLoad | |
14282 | + retl | |
14283 | + wr %%g0, 0, %%fprs | |
14284 | + | |
14285 | +10: cmp %%o0, 5 | |
14286 | + be,pt %%xcc, 15f | |
14287 | + mov -64, %%g1 | |
14288 | + | |
14289 | +14: ldx [%%o1 + 16], %%o2 | |
14290 | + ldx [%%o1 + 24], %%o0 | |
14291 | + ldx [%%o2 + %0], %%o2 | |
14292 | + ldx [%%o0 + %0], %%o0 | |
14293 | + | |
14294 | +4: ldda [%%o2] %3, %%f32 | |
14295 | + fxor %%f0, %%f16, %%f16 | |
14296 | + fxor %%f2, %%f18, %%f18 | |
14297 | + add %%o4, 64, %%o4 | |
14298 | + fxor %%f4, %%f20, %%f20 | |
14299 | + fxor %%f6, %%f22, %%f22 | |
14300 | + add %%o3, 64, %%o3 | |
14301 | + fxor %%f8, %%f24, %%f24 | |
14302 | + fxor %%f10, %%f26, %%f26 | |
14303 | + fxor %%f12, %%f28, %%f28 | |
14304 | + fxor %%f14, %%f30, %%f30 | |
14305 | + ldda [%%o0] %3, %%f48 | |
14306 | + fxor %%f16, %%f32, %%f32 | |
14307 | + fxor %%f18, %%f34, %%f34 | |
14308 | + fxor %%f20, %%f36, %%f36 | |
14309 | + fxor %%f22, %%f38, %%f38 | |
14310 | + add %%o2, 64, %%o2 | |
14311 | + fxor %%f24, %%f40, %%f40 | |
14312 | + fxor %%f26, %%f42, %%f42 | |
14313 | + fxor %%f28, %%f44, %%f44 | |
14314 | + fxor %%f30, %%f46, %%f46 | |
14315 | + ldda [%%o4] %3, %%f0 | |
14316 | + fxor %%f32, %%f48, %%f48 | |
14317 | + fxor %%f34, %%f50, %%f50 | |
14318 | + fxor %%f36, %%f52, %%f52 | |
14319 | + add %%o0, 64, %%o0 | |
14320 | + fxor %%f38, %%f54, %%f54 | |
14321 | + fxor %%f40, %%f56, %%f56 | |
14322 | + fxor %%f42, %%f58, %%f58 | |
14323 | + subcc %%g5, 64, %%g5 | |
14324 | + fxor %%f44, %%f60, %%f60 | |
14325 | + fxor %%f46, %%f62, %%f62 | |
14326 | + stda %%f48, [%%o4 + %%g1] %3 | |
14327 | + bne,pt %%xcc, 4b | |
14328 | + ldda [%%o3] %3, %%f16 | |
14329 | + | |
14330 | + ldda [%%o2] %3, %%f32 | |
14331 | + fxor %%f0, %%f16, %%f16 | |
14332 | + fxor %%f2, %%f18, %%f18 | |
14333 | + fxor %%f4, %%f20, %%f20 | |
14334 | + fxor %%f6, %%f22, %%f22 | |
14335 | + fxor %%f8, %%f24, %%f24 | |
14336 | + fxor %%f10, %%f26, %%f26 | |
14337 | + fxor %%f12, %%f28, %%f28 | |
14338 | + fxor %%f14, %%f30, %%f30 | |
14339 | + ldda [%%o0] %3, %%f48 | |
14340 | + fxor %%f16, %%f32, %%f32 | |
14341 | + fxor %%f18, %%f34, %%f34 | |
14342 | + fxor %%f20, %%f36, %%f36 | |
14343 | + fxor %%f22, %%f38, %%f38 | |
14344 | + fxor %%f24, %%f40, %%f40 | |
14345 | + fxor %%f26, %%f42, %%f42 | |
14346 | + fxor %%f28, %%f44, %%f44 | |
14347 | + fxor %%f30, %%f46, %%f46 | |
14348 | + membar #Sync | |
14349 | + fxor %%f32, %%f48, %%f48 | |
14350 | + fxor %%f34, %%f50, %%f50 | |
14351 | + fxor %%f36, %%f52, %%f52 | |
14352 | + fxor %%f38, %%f54, %%f54 | |
14353 | + fxor %%f40, %%f56, %%f56 | |
14354 | + fxor %%f42, %%f58, %%f58 | |
14355 | + fxor %%f44, %%f60, %%f60 | |
14356 | + fxor %%f46, %%f62, %%f62 | |
14357 | + stda %%f48, [%%o4] %3 | |
14358 | + membar #Sync|#StoreStore|#StoreLoad | |
14359 | + retl | |
14360 | + wr %%g0, 0, %%fprs | |
14361 | + | |
14362 | +15: ldx [%%o1 + 16], %%o2 | |
14363 | + ldx [%%o1 + 24], %%o0 | |
14364 | + ldx [%%o1 + 32], %%o1 | |
14365 | + ldx [%%o2 + %0], %%o2 | |
14366 | + ldx [%%o0 + %0], %%o0 | |
14367 | + ldx [%%o1 + %0], %%o1 | |
14368 | + | |
14369 | +5: ldda [%%o2] %3, %%f32 | |
14370 | + fxor %%f0, %%f16, %%f48 | |
14371 | + fxor %%f2, %%f18, %%f50 | |
14372 | + add %%o4, 64, %%o4 | |
14373 | + fxor %%f4, %%f20, %%f52 | |
14374 | + fxor %%f6, %%f22, %%f54 | |
14375 | + add %%o3, 64, %%o3 | |
14376 | + fxor %%f8, %%f24, %%f56 | |
14377 | + fxor %%f10, %%f26, %%f58 | |
14378 | + fxor %%f12, %%f28, %%f60 | |
14379 | + fxor %%f14, %%f30, %%f62 | |
14380 | + ldda [%%o0] %3, %%f16 | |
14381 | + fxor %%f48, %%f32, %%f48 | |
14382 | + fxor %%f50, %%f34, %%f50 | |
14383 | + fxor %%f52, %%f36, %%f52 | |
14384 | + fxor %%f54, %%f38, %%f54 | |
14385 | + add %%o2, 64, %%o2 | |
14386 | + fxor %%f56, %%f40, %%f56 | |
14387 | + fxor %%f58, %%f42, %%f58 | |
14388 | + fxor %%f60, %%f44, %%f60 | |
14389 | + fxor %%f62, %%f46, %%f62 | |
14390 | + ldda [%%o1] %3, %%f32 | |
14391 | + fxor %%f48, %%f16, %%f48 | |
14392 | + fxor %%f50, %%f18, %%f50 | |
14393 | + add %%o0, 64, %%o0 | |
14394 | + fxor %%f52, %%f20, %%f52 | |
14395 | + fxor %%f54, %%f22, %%f54 | |
14396 | + add %%o1, 64, %%o1 | |
14397 | + fxor %%f56, %%f24, %%f56 | |
14398 | + fxor %%f58, %%f26, %%f58 | |
14399 | + fxor %%f60, %%f28, %%f60 | |
14400 | + fxor %%f62, %%f30, %%f62 | |
14401 | + ldda [%%o4] %3, %%f0 | |
14402 | + fxor %%f48, %%f32, %%f48 | |
14403 | + fxor %%f50, %%f34, %%f50 | |
14404 | + fxor %%f52, %%f36, %%f52 | |
14405 | + fxor %%f54, %%f38, %%f54 | |
14406 | + fxor %%f56, %%f40, %%f56 | |
14407 | + fxor %%f58, %%f42, %%f58 | |
14408 | + subcc %%g5, 64, %%g5 | |
14409 | + fxor %%f60, %%f44, %%f60 | |
14410 | + fxor %%f62, %%f46, %%f62 | |
14411 | + stda %%f48, [%%o4 + %%g1] %3 | |
14412 | + bne,pt %%xcc, 5b | |
14413 | + ldda [%%o3] %3, %%f16 | |
14414 | + | |
14415 | + ldda [%%o2] %3, %%f32 | |
14416 | + fxor %%f0, %%f16, %%f48 | |
14417 | + fxor %%f2, %%f18, %%f50 | |
14418 | + fxor %%f4, %%f20, %%f52 | |
14419 | + fxor %%f6, %%f22, %%f54 | |
14420 | + fxor %%f8, %%f24, %%f56 | |
14421 | + fxor %%f10, %%f26, %%f58 | |
14422 | + fxor %%f12, %%f28, %%f60 | |
14423 | + fxor %%f14, %%f30, %%f62 | |
14424 | + ldda [%%o0] %3, %%f16 | |
14425 | + fxor %%f48, %%f32, %%f48 | |
14426 | + fxor %%f50, %%f34, %%f50 | |
14427 | + fxor %%f52, %%f36, %%f52 | |
14428 | + fxor %%f54, %%f38, %%f54 | |
14429 | + fxor %%f56, %%f40, %%f56 | |
14430 | + fxor %%f58, %%f42, %%f58 | |
14431 | + fxor %%f60, %%f44, %%f60 | |
14432 | + fxor %%f62, %%f46, %%f62 | |
14433 | + ldda [%%o1] %3, %%f32 | |
14434 | + fxor %%f48, %%f16, %%f48 | |
14435 | + fxor %%f50, %%f18, %%f50 | |
14436 | + fxor %%f52, %%f20, %%f52 | |
14437 | + fxor %%f54, %%f22, %%f54 | |
14438 | + fxor %%f56, %%f24, %%f56 | |
14439 | + fxor %%f58, %%f26, %%f58 | |
14440 | + fxor %%f60, %%f28, %%f60 | |
14441 | + fxor %%f62, %%f30, %%f62 | |
14442 | + membar #Sync | |
14443 | + fxor %%f48, %%f32, %%f48 | |
14444 | + fxor %%f50, %%f34, %%f50 | |
14445 | + fxor %%f52, %%f36, %%f52 | |
14446 | + fxor %%f54, %%f38, %%f54 | |
14447 | + fxor %%f56, %%f40, %%f56 | |
14448 | + fxor %%f58, %%f42, %%f58 | |
14449 | + fxor %%f60, %%f44, %%f60 | |
14450 | + fxor %%f62, %%f46, %%f62 | |
14451 | + stda %%f48, [%%o4] %3 | |
14452 | + membar #Sync|#StoreStore|#StoreLoad | |
14453 | + retl | |
14454 | + wr %%g0, 0, %%fprs | |
14455 | + " : : | |
14456 | + "i" (&((struct buffer_head *)0)->b_data), | |
14457 | + "i" (&((struct buffer_head *)0)->b_size), | |
14458 | + "i" (FPRS_FEF|FPRS_DU), "i" (ASI_BLK_P), | |
14459 | + "i" (FPRS_FEF), "i" (VISenter)); | |
14460 | +} | |
14461 | +#endif /* __sparc_v9__ */ | |
14462 | + | |
14463 | +#if defined(__sparc__) && !defined(__sparc_v9__) | |
14464 | +/* | |
14465 | + * High speed xor_block operation for RAID4/5 utilizing the | |
14466 | + * ldd/std SPARC instructions. | |
14467 | + * | |
14468 | + * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) | |
14469 | + * | |
14470 | + */ | |
14471 | + | |
14472 | +XORBLOCK_TEMPLATE(SPARC) | |
14473 | +{ | |
14474 | + int size = bh_ptr[0]->b_size; | |
14475 | + int lines = size / (sizeof (long)) / 8, i; | |
14476 | + long *destp = (long *) bh_ptr[0]->b_data; | |
14477 | + long *source1 = (long *) bh_ptr[1]->b_data; | |
14478 | + long *source2, *source3, *source4; | |
14479 | + | |
14480 | + switch (count) { | |
14481 | + case 2: | |
14482 | + for (i = lines; i > 0; i--) { | |
14483 | + __asm__ __volatile__(" | |
14484 | + ldd [%0 + 0x00], %%g2 | |
14485 | + ldd [%0 + 0x08], %%g4 | |
14486 | + ldd [%0 + 0x10], %%o0 | |
14487 | + ldd [%0 + 0x18], %%o2 | |
14488 | + ldd [%1 + 0x00], %%o4 | |
14489 | + ldd [%1 + 0x08], %%l0 | |
14490 | + ldd [%1 + 0x10], %%l2 | |
14491 | + ldd [%1 + 0x18], %%l4 | |
14492 | + xor %%g2, %%o4, %%g2 | |
14493 | + xor %%g3, %%o5, %%g3 | |
14494 | + xor %%g4, %%l0, %%g4 | |
14495 | + xor %%g5, %%l1, %%g5 | |
14496 | + xor %%o0, %%l2, %%o0 | |
14497 | + xor %%o1, %%l3, %%o1 | |
14498 | + xor %%o2, %%l4, %%o2 | |
14499 | + xor %%o3, %%l5, %%o3 | |
14500 | + std %%g2, [%0 + 0x00] | |
14501 | + std %%g4, [%0 + 0x08] | |
14502 | + std %%o0, [%0 + 0x10] | |
14503 | + std %%o2, [%0 + 0x18] | |
14504 | + " : : "r" (destp), "r" (source1) : "g2", "g3", "g4", "g5", "o0", | |
14505 | + "o1", "o2", "o3", "o4", "o5", "l0", "l1", "l2", "l3", "l4", "l5"); | |
14506 | + destp += 8; | |
14507 | + source1 += 8; | |
14508 | + } | |
14509 | + break; | |
14510 | + case 3: | |
14511 | + source2 = (long *) bh_ptr[2]->b_data; | |
14512 | + for (i = lines; i > 0; i--) { | |
14513 | + __asm__ __volatile__(" | |
14514 | + ldd [%0 + 0x00], %%g2 | |
14515 | + ldd [%0 + 0x08], %%g4 | |
14516 | + ldd [%0 + 0x10], %%o0 | |
14517 | + ldd [%0 + 0x18], %%o2 | |
14518 | + ldd [%1 + 0x00], %%o4 | |
14519 | + ldd [%1 + 0x08], %%l0 | |
14520 | + ldd [%1 + 0x10], %%l2 | |
14521 | + ldd [%1 + 0x18], %%l4 | |
14522 | + xor %%g2, %%o4, %%g2 | |
14523 | + xor %%g3, %%o5, %%g3 | |
14524 | + ldd [%2 + 0x00], %%o4 | |
14525 | + xor %%g4, %%l0, %%g4 | |
14526 | + xor %%g5, %%l1, %%g5 | |
14527 | + ldd [%2 + 0x08], %%l0 | |
14528 | + xor %%o0, %%l2, %%o0 | |
14529 | + xor %%o1, %%l3, %%o1 | |
14530 | + ldd [%2 + 0x10], %%l2 | |
14531 | + xor %%o2, %%l4, %%o2 | |
14532 | + xor %%o3, %%l5, %%o3 | |
14533 | + ldd [%2 + 0x18], %%l4 | |
14534 | + xor %%g2, %%o4, %%g2 | |
14535 | + xor %%g3, %%o5, %%g3 | |
14536 | + xor %%g4, %%l0, %%g4 | |
14537 | + xor %%g5, %%l1, %%g5 | |
14538 | + xor %%o0, %%l2, %%o0 | |
14539 | + xor %%o1, %%l3, %%o1 | |
14540 | + xor %%o2, %%l4, %%o2 | |
14541 | + xor %%o3, %%l5, %%o3 | |
14542 | + std %%g2, [%0 + 0x00] | |
14543 | + std %%g4, [%0 + 0x08] | |
14544 | + std %%o0, [%0 + 0x10] | |
14545 | + std %%o2, [%0 + 0x18] | |
14546 | + " : : "r" (destp), "r" (source1), "r" (source2) | |
14547 | + : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5", | |
14548 | + "l0", "l1", "l2", "l3", "l4", "l5"); | |
14549 | + destp += 8; | |
14550 | + source1 += 8; | |
14551 | + source2 += 8; | |
14552 | + } | |
14553 | + break; | |
14554 | + case 4: | |
14555 | + source2 = (long *) bh_ptr[2]->b_data; | |
14556 | + source3 = (long *) bh_ptr[3]->b_data; | |
14557 | + for (i = lines; i > 0; i--) { | |
14558 | + __asm__ __volatile__(" | |
14559 | + ldd [%0 + 0x00], %%g2 | |
14560 | + ldd [%0 + 0x08], %%g4 | |
14561 | + ldd [%0 + 0x10], %%o0 | |
14562 | + ldd [%0 + 0x18], %%o2 | |
14563 | + ldd [%1 + 0x00], %%o4 | |
14564 | + ldd [%1 + 0x08], %%l0 | |
14565 | + ldd [%1 + 0x10], %%l2 | |
14566 | + ldd [%1 + 0x18], %%l4 | |
14567 | + xor %%g2, %%o4, %%g2 | |
14568 | + xor %%g3, %%o5, %%g3 | |
14569 | + ldd [%2 + 0x00], %%o4 | |
14570 | + xor %%g4, %%l0, %%g4 | |
14571 | + xor %%g5, %%l1, %%g5 | |
14572 | + ldd [%2 + 0x08], %%l0 | |
14573 | + xor %%o0, %%l2, %%o0 | |
14574 | + xor %%o1, %%l3, %%o1 | |
14575 | + ldd [%2 + 0x10], %%l2 | |
14576 | + xor %%o2, %%l4, %%o2 | |
14577 | + xor %%o3, %%l5, %%o3 | |
14578 | + ldd [%2 + 0x18], %%l4 | |
14579 | + xor %%g2, %%o4, %%g2 | |
14580 | + xor %%g3, %%o5, %%g3 | |
14581 | + ldd [%3 + 0x00], %%o4 | |
14582 | + xor %%g4, %%l0, %%g4 | |
14583 | + xor %%g5, %%l1, %%g5 | |
14584 | + ldd [%3 + 0x08], %%l0 | |
14585 | + xor %%o0, %%l2, %%o0 | |
14586 | + xor %%o1, %%l3, %%o1 | |
14587 | + ldd [%3 + 0x10], %%l2 | |
14588 | + xor %%o2, %%l4, %%o2 | |
14589 | + xor %%o3, %%l5, %%o3 | |
14590 | + ldd [%3 + 0x18], %%l4 | |
14591 | + xor %%g2, %%o4, %%g2 | |
14592 | + xor %%g3, %%o5, %%g3 | |
14593 | + xor %%g4, %%l0, %%g4 | |
14594 | + xor %%g5, %%l1, %%g5 | |
14595 | + xor %%o0, %%l2, %%o0 | |
14596 | + xor %%o1, %%l3, %%o1 | |
14597 | + xor %%o2, %%l4, %%o2 | |
14598 | + xor %%o3, %%l5, %%o3 | |
14599 | + std %%g2, [%0 + 0x00] | |
14600 | + std %%g4, [%0 + 0x08] | |
14601 | + std %%o0, [%0 + 0x10] | |
14602 | + std %%o2, [%0 + 0x18] | |
14603 | + " : : "r" (destp), "r" (source1), "r" (source2), "r" (source3) | |
14604 | + : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5", | |
14605 | + "l0", "l1", "l2", "l3", "l4", "l5"); | |
14606 | + destp += 8; | |
14607 | + source1 += 8; | |
14608 | + source2 += 8; | |
14609 | + source3 += 8; | |
14610 | + } | |
14611 | + break; | |
14612 | + case 5: | |
14613 | + source2 = (long *) bh_ptr[2]->b_data; | |
14614 | + source3 = (long *) bh_ptr[3]->b_data; | |
14615 | + source4 = (long *) bh_ptr[4]->b_data; | |
14616 | + for (i = lines; i > 0; i--) { | |
14617 | + __asm__ __volatile__(" | |
14618 | + ldd [%0 + 0x00], %%g2 | |
14619 | + ldd [%0 + 0x08], %%g4 | |
14620 | + ldd [%0 + 0x10], %%o0 | |
14621 | + ldd [%0 + 0x18], %%o2 | |
14622 | + ldd [%1 + 0x00], %%o4 | |
14623 | + ldd [%1 + 0x08], %%l0 | |
14624 | + ldd [%1 + 0x10], %%l2 | |
14625 | + ldd [%1 + 0x18], %%l4 | |
14626 | + xor %%g2, %%o4, %%g2 | |
14627 | + xor %%g3, %%o5, %%g3 | |
14628 | + ldd [%2 + 0x00], %%o4 | |
14629 | + xor %%g4, %%l0, %%g4 | |
14630 | + xor %%g5, %%l1, %%g5 | |
14631 | + ldd [%2 + 0x08], %%l0 | |
14632 | + xor %%o0, %%l2, %%o0 | |
14633 | + xor %%o1, %%l3, %%o1 | |
14634 | + ldd [%2 + 0x10], %%l2 | |
14635 | + xor %%o2, %%l4, %%o2 | |
14636 | + xor %%o3, %%l5, %%o3 | |
14637 | + ldd [%2 + 0x18], %%l4 | |
14638 | + xor %%g2, %%o4, %%g2 | |
14639 | + xor %%g3, %%o5, %%g3 | |
14640 | + ldd [%3 + 0x00], %%o4 | |
14641 | + xor %%g4, %%l0, %%g4 | |
14642 | + xor %%g5, %%l1, %%g5 | |
14643 | + ldd [%3 + 0x08], %%l0 | |
14644 | + xor %%o0, %%l2, %%o0 | |
14645 | + xor %%o1, %%l3, %%o1 | |
14646 | + ldd [%3 + 0x10], %%l2 | |
14647 | + xor %%o2, %%l4, %%o2 | |
14648 | + xor %%o3, %%l5, %%o3 | |
14649 | + ldd [%3 + 0x18], %%l4 | |
14650 | + xor %%g2, %%o4, %%g2 | |
14651 | + xor %%g3, %%o5, %%g3 | |
14652 | + ldd [%4 + 0x00], %%o4 | |
14653 | + xor %%g4, %%l0, %%g4 | |
14654 | + xor %%g5, %%l1, %%g5 | |
14655 | + ldd [%4 + 0x08], %%l0 | |
14656 | + xor %%o0, %%l2, %%o0 | |
14657 | + xor %%o1, %%l3, %%o1 | |
14658 | + ldd [%4 + 0x10], %%l2 | |
14659 | + xor %%o2, %%l4, %%o2 | |
14660 | + xor %%o3, %%l5, %%o3 | |
14661 | + ldd [%4 + 0x18], %%l4 | |
14662 | + xor %%g2, %%o4, %%g2 | |
14663 | + xor %%g3, %%o5, %%g3 | |
14664 | + xor %%g4, %%l0, %%g4 | |
14665 | + xor %%g5, %%l1, %%g5 | |
14666 | + xor %%o0, %%l2, %%o0 | |
14667 | + xor %%o1, %%l3, %%o1 | |
14668 | + xor %%o2, %%l4, %%o2 | |
14669 | + xor %%o3, %%l5, %%o3 | |
14670 | + std %%g2, [%0 + 0x00] | |
14671 | + std %%g4, [%0 + 0x08] | |
14672 | + std %%o0, [%0 + 0x10] | |
14673 | + std %%o2, [%0 + 0x18] | |
14674 | + " : : "r" (destp), "r" (source1), "r" (source2), "r" (source3), "r" (source4) | |
14675 | + : "g2", "g3", "g4", "g5", "o0", "o1", "o2", "o3", "o4", "o5", | |
14676 | + "l0", "l1", "l2", "l3", "l4", "l5"); | |
14677 | + destp += 8; | |
14678 | + source1 += 8; | |
14679 | + source2 += 8; | |
14680 | + source3 += 8; | |
14681 | + source4 += 8; | |
14682 | + } | |
14683 | + break; | |
14684 | + } | |
14685 | +} | |
14686 | +#endif /* __sparc_v[78]__ */ | |
14687 | + | |
14688 | +#ifndef __sparc_v9__ | |
14689 | + | |
14690 | +/* | |
14691 | + * this one works reasonably on any x86 CPU | |
14692 | + * (send me an assembly version for inclusion if you can make it faster) | |
14693 | + * | |
14694 | + * this one is just as fast as written in pure assembly on x86. | |
14695 | + * the reason for this separate version is that the | |
14696 | + * fast open-coded xor routine "32reg" produces suboptimal code | |
14697 | + * on x86, due to lack of registers. | |
14698 | + */ | |
14699 | +XORBLOCK_TEMPLATE(8regs) | |
14700 | +{ | |
14701 | + int len = bh_ptr[0]->b_size; | |
14702 | + long *destp = (long *) bh_ptr[0]->b_data; | |
14703 | + long *source1, *source2, *source3, *source4; | |
14704 | + long lines = len / (sizeof (long)) / 8, i; | |
14705 | + | |
14706 | + switch(count) { | |
14707 | + case 2: | |
14708 | + source1 = (long *) bh_ptr[1]->b_data; | |
14709 | + for (i = lines; i > 0; i--) { | |
14710 | + *(destp + 0) ^= *(source1 + 0); | |
14711 | + *(destp + 1) ^= *(source1 + 1); | |
14712 | + *(destp + 2) ^= *(source1 + 2); | |
14713 | + *(destp + 3) ^= *(source1 + 3); | |
14714 | + *(destp + 4) ^= *(source1 + 4); | |
14715 | + *(destp + 5) ^= *(source1 + 5); | |
14716 | + *(destp + 6) ^= *(source1 + 6); | |
14717 | + *(destp + 7) ^= *(source1 + 7); | |
14718 | + source1 += 8; | |
14719 | + destp += 8; | |
14720 | + } | |
14721 | + break; | |
14722 | + case 3: | |
14723 | + source2 = (long *) bh_ptr[2]->b_data; | |
14724 | + source1 = (long *) bh_ptr[1]->b_data; | |
14725 | + for (i = lines; i > 0; i--) { | |
14726 | + *(destp + 0) ^= *(source1 + 0); | |
14727 | + *(destp + 0) ^= *(source2 + 0); | |
14728 | + *(destp + 1) ^= *(source1 + 1); | |
14729 | + *(destp + 1) ^= *(source2 + 1); | |
14730 | + *(destp + 2) ^= *(source1 + 2); | |
14731 | + *(destp + 2) ^= *(source2 + 2); | |
14732 | + *(destp + 3) ^= *(source1 + 3); | |
14733 | + *(destp + 3) ^= *(source2 + 3); | |
14734 | + *(destp + 4) ^= *(source1 + 4); | |
14735 | + *(destp + 4) ^= *(source2 + 4); | |
14736 | + *(destp + 5) ^= *(source1 + 5); | |
14737 | + *(destp + 5) ^= *(source2 + 5); | |
14738 | + *(destp + 6) ^= *(source1 + 6); | |
14739 | + *(destp + 6) ^= *(source2 + 6); | |
14740 | + *(destp + 7) ^= *(source1 + 7); | |
14741 | + *(destp + 7) ^= *(source2 + 7); | |
14742 | + source1 += 8; | |
14743 | + source2 += 8; | |
14744 | + destp += 8; | |
14745 | + } | |
14746 | + break; | |
14747 | + case 4: | |
14748 | + source3 = (long *) bh_ptr[3]->b_data; | |
14749 | + source2 = (long *) bh_ptr[2]->b_data; | |
14750 | + source1 = (long *) bh_ptr[1]->b_data; | |
14751 | + for (i = lines; i > 0; i--) { | |
14752 | + *(destp + 0) ^= *(source1 + 0); | |
14753 | + *(destp + 0) ^= *(source2 + 0); | |
14754 | + *(destp + 0) ^= *(source3 + 0); | |
14755 | + *(destp + 1) ^= *(source1 + 1); | |
14756 | + *(destp + 1) ^= *(source2 + 1); | |
14757 | + *(destp + 1) ^= *(source3 + 1); | |
14758 | + *(destp + 2) ^= *(source1 + 2); | |
14759 | + *(destp + 2) ^= *(source2 + 2); | |
14760 | + *(destp + 2) ^= *(source3 + 2); | |
14761 | + *(destp + 3) ^= *(source1 + 3); | |
14762 | + *(destp + 3) ^= *(source2 + 3); | |
14763 | + *(destp + 3) ^= *(source3 + 3); | |
14764 | + *(destp + 4) ^= *(source1 + 4); | |
14765 | + *(destp + 4) ^= *(source2 + 4); | |
14766 | + *(destp + 4) ^= *(source3 + 4); | |
14767 | + *(destp + 5) ^= *(source1 + 5); | |
14768 | + *(destp + 5) ^= *(source2 + 5); | |
14769 | + *(destp + 5) ^= *(source3 + 5); | |
14770 | + *(destp + 6) ^= *(source1 + 6); | |
14771 | + *(destp + 6) ^= *(source2 + 6); | |
14772 | + *(destp + 6) ^= *(source3 + 6); | |
14773 | + *(destp + 7) ^= *(source1 + 7); | |
14774 | + *(destp + 7) ^= *(source2 + 7); | |
14775 | + *(destp + 7) ^= *(source3 + 7); | |
14776 | + source1 += 8; | |
14777 | + source2 += 8; | |
14778 | + source3 += 8; | |
14779 | + destp += 8; | |
14780 | + } | |
14781 | + break; | |
14782 | + case 5: | |
14783 | + source4 = (long *) bh_ptr[4]->b_data; | |
14784 | + source3 = (long *) bh_ptr[3]->b_data; | |
14785 | + source2 = (long *) bh_ptr[2]->b_data; | |
14786 | + source1 = (long *) bh_ptr[1]->b_data; | |
14787 | + for (i = lines; i > 0; i--) { | |
14788 | + *(destp + 0) ^= *(source1 + 0); | |
14789 | + *(destp + 0) ^= *(source2 + 0); | |
14790 | + *(destp + 0) ^= *(source3 + 0); | |
14791 | + *(destp + 0) ^= *(source4 + 0); | |
14792 | + *(destp + 1) ^= *(source1 + 1); | |
14793 | + *(destp + 1) ^= *(source2 + 1); | |
14794 | + *(destp + 1) ^= *(source3 + 1); | |
14795 | + *(destp + 1) ^= *(source4 + 1); | |
14796 | + *(destp + 2) ^= *(source1 + 2); | |
14797 | + *(destp + 2) ^= *(source2 + 2); | |
14798 | + *(destp + 2) ^= *(source3 + 2); | |
14799 | + *(destp + 2) ^= *(source4 + 2); | |
14800 | + *(destp + 3) ^= *(source1 + 3); | |
14801 | + *(destp + 3) ^= *(source2 + 3); | |
14802 | + *(destp + 3) ^= *(source3 + 3); | |
14803 | + *(destp + 3) ^= *(source4 + 3); | |
14804 | + *(destp + 4) ^= *(source1 + 4); | |
14805 | + *(destp + 4) ^= *(source2 + 4); | |
14806 | + *(destp + 4) ^= *(source3 + 4); | |
14807 | + *(destp + 4) ^= *(source4 + 4); | |
14808 | + *(destp + 5) ^= *(source1 + 5); | |
14809 | + *(destp + 5) ^= *(source2 + 5); | |
14810 | + *(destp + 5) ^= *(source3 + 5); | |
14811 | + *(destp + 5) ^= *(source4 + 5); | |
14812 | + *(destp + 6) ^= *(source1 + 6); | |
14813 | + *(destp + 6) ^= *(source2 + 6); | |
14814 | + *(destp + 6) ^= *(source3 + 6); | |
14815 | + *(destp + 6) ^= *(source4 + 6); | |
14816 | + *(destp + 7) ^= *(source1 + 7); | |
14817 | + *(destp + 7) ^= *(source2 + 7); | |
14818 | + *(destp + 7) ^= *(source3 + 7); | |
14819 | + *(destp + 7) ^= *(source4 + 7); | |
14820 | + source1 += 8; | |
14821 | + source2 += 8; | |
14822 | + source3 += 8; | |
14823 | + source4 += 8; | |
14824 | + destp += 8; | |
14825 | + } | |
14826 | + break; | |
14827 | + } | |
14828 | +} | |
14829 | + | |
14830 | +/* | |
14831 | + * platform independent RAID5 checksum calculation, this should | |
14832 | + * be very fast on any platform that has a decent amount of | |
14833 | + * registers. (32 or more) | |
14834 | + */ | |
14835 | +XORBLOCK_TEMPLATE(32regs) | |
14836 | +{ | |
14837 | + int size = bh_ptr[0]->b_size; | |
14838 | + int lines = size / (sizeof (long)) / 8, i; | |
14839 | + long *destp = (long *) bh_ptr[0]->b_data; | |
14840 | + long *source1, *source2, *source3, *source4; | |
14841 | + | |
14842 | + /* LOTS of registers available... | |
14843 | + We do explicite loop-unrolling here for code which | |
14844 | + favours RISC machines. In fact this is almoast direct | |
14845 | + RISC assembly on Alpha and SPARC :-) */ | |
14846 | + | |
14847 | + | |
14848 | + switch(count) { | |
14849 | + case 2: | |
14850 | + source1 = (long *) bh_ptr[1]->b_data; | |
14851 | + for (i = lines; i > 0; i--) { | |
14852 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
14853 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
14854 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
14855 | + d2 = destp[2]; | |
14856 | + d3 = destp[3]; | |
14857 | + d4 = destp[4]; | |
14858 | + d5 = destp[5]; | |
14859 | + d6 = destp[6]; | |
14860 | + d7 = destp[7]; | |
14861 | + d0 ^= source1[0]; | |
14862 | + d1 ^= source1[1]; | |
14863 | + d2 ^= source1[2]; | |
14864 | + d3 ^= source1[3]; | |
14865 | + d4 ^= source1[4]; | |
14866 | + d5 ^= source1[5]; | |
14867 | + d6 ^= source1[6]; | |
14868 | + d7 ^= source1[7]; | |
14869 | + destp[0] = d0; /* Store the result (in burts) */ | |
14870 | + destp[1] = d1; | |
14871 | + destp[2] = d2; | |
14872 | + destp[3] = d3; | |
14873 | + destp[4] = d4; /* Store the result (in burts) */ | |
14874 | + destp[5] = d5; | |
14875 | + destp[6] = d6; | |
14876 | + destp[7] = d7; | |
14877 | + source1 += 8; | |
14878 | + destp += 8; | |
14879 | + } | |
14880 | + break; | |
14881 | + case 3: | |
14882 | + source2 = (long *) bh_ptr[2]->b_data; | |
14883 | + source1 = (long *) bh_ptr[1]->b_data; | |
14884 | + for (i = lines; i > 0; i--) { | |
14885 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
14886 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
14887 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
14888 | + d2 = destp[2]; | |
14889 | + d3 = destp[3]; | |
14890 | + d4 = destp[4]; | |
14891 | + d5 = destp[5]; | |
14892 | + d6 = destp[6]; | |
14893 | + d7 = destp[7]; | |
14894 | + d0 ^= source1[0]; | |
14895 | + d1 ^= source1[1]; | |
14896 | + d2 ^= source1[2]; | |
14897 | + d3 ^= source1[3]; | |
14898 | + d4 ^= source1[4]; | |
14899 | + d5 ^= source1[5]; | |
14900 | + d6 ^= source1[6]; | |
14901 | + d7 ^= source1[7]; | |
14902 | + d0 ^= source2[0]; | |
14903 | + d1 ^= source2[1]; | |
14904 | + d2 ^= source2[2]; | |
14905 | + d3 ^= source2[3]; | |
14906 | + d4 ^= source2[4]; | |
14907 | + d5 ^= source2[5]; | |
14908 | + d6 ^= source2[6]; | |
14909 | + d7 ^= source2[7]; | |
14910 | + destp[0] = d0; /* Store the result (in burts) */ | |
14911 | + destp[1] = d1; | |
14912 | + destp[2] = d2; | |
14913 | + destp[3] = d3; | |
14914 | + destp[4] = d4; /* Store the result (in burts) */ | |
14915 | + destp[5] = d5; | |
14916 | + destp[6] = d6; | |
14917 | + destp[7] = d7; | |
14918 | + source1 += 8; | |
14919 | + source2 += 8; | |
14920 | + destp += 8; | |
14921 | + } | |
14922 | + break; | |
14923 | + case 4: | |
14924 | + source3 = (long *) bh_ptr[3]->b_data; | |
14925 | + source2 = (long *) bh_ptr[2]->b_data; | |
14926 | + source1 = (long *) bh_ptr[1]->b_data; | |
14927 | + for (i = lines; i > 0; i--) { | |
14928 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
14929 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
14930 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
14931 | + d2 = destp[2]; | |
14932 | + d3 = destp[3]; | |
14933 | + d4 = destp[4]; | |
14934 | + d5 = destp[5]; | |
14935 | + d6 = destp[6]; | |
14936 | + d7 = destp[7]; | |
14937 | + d0 ^= source1[0]; | |
14938 | + d1 ^= source1[1]; | |
14939 | + d2 ^= source1[2]; | |
14940 | + d3 ^= source1[3]; | |
14941 | + d4 ^= source1[4]; | |
14942 | + d5 ^= source1[5]; | |
14943 | + d6 ^= source1[6]; | |
14944 | + d7 ^= source1[7]; | |
14945 | + d0 ^= source2[0]; | |
14946 | + d1 ^= source2[1]; | |
14947 | + d2 ^= source2[2]; | |
14948 | + d3 ^= source2[3]; | |
14949 | + d4 ^= source2[4]; | |
14950 | + d5 ^= source2[5]; | |
14951 | + d6 ^= source2[6]; | |
14952 | + d7 ^= source2[7]; | |
14953 | + d0 ^= source3[0]; | |
14954 | + d1 ^= source3[1]; | |
14955 | + d2 ^= source3[2]; | |
14956 | + d3 ^= source3[3]; | |
14957 | + d4 ^= source3[4]; | |
14958 | + d5 ^= source3[5]; | |
14959 | + d6 ^= source3[6]; | |
14960 | + d7 ^= source3[7]; | |
14961 | + destp[0] = d0; /* Store the result (in burts) */ | |
14962 | + destp[1] = d1; | |
14963 | + destp[2] = d2; | |
14964 | + destp[3] = d3; | |
14965 | + destp[4] = d4; /* Store the result (in burts) */ | |
14966 | + destp[5] = d5; | |
14967 | + destp[6] = d6; | |
14968 | + destp[7] = d7; | |
14969 | + source1 += 8; | |
14970 | + source2 += 8; | |
14971 | + source3 += 8; | |
14972 | + destp += 8; | |
14973 | + } | |
14974 | + break; | |
14975 | + case 5: | |
14976 | + source4 = (long *) bh_ptr[4]->b_data; | |
14977 | + source3 = (long *) bh_ptr[3]->b_data; | |
14978 | + source2 = (long *) bh_ptr[2]->b_data; | |
14979 | + source1 = (long *) bh_ptr[1]->b_data; | |
14980 | + for (i = lines; i > 0; i--) { | |
14981 | + register long d0, d1, d2, d3, d4, d5, d6, d7; | |
14982 | + d0 = destp[0]; /* Pull the stuff into registers */ | |
14983 | + d1 = destp[1]; /* ... in bursts, if possible. */ | |
14984 | + d2 = destp[2]; | |
14985 | + d3 = destp[3]; | |
14986 | + d4 = destp[4]; | |
14987 | + d5 = destp[5]; | |
14988 | + d6 = destp[6]; | |
14989 | + d7 = destp[7]; | |
14990 | + d0 ^= source1[0]; | |
14991 | + d1 ^= source1[1]; | |
14992 | + d2 ^= source1[2]; | |
14993 | + d3 ^= source1[3]; | |
14994 | + d4 ^= source1[4]; | |
14995 | + d5 ^= source1[5]; | |
14996 | + d6 ^= source1[6]; | |
14997 | + d7 ^= source1[7]; | |
14998 | + d0 ^= source2[0]; | |
14999 | + d1 ^= source2[1]; | |
15000 | + d2 ^= source2[2]; | |
15001 | + d3 ^= source2[3]; | |
15002 | + d4 ^= source2[4]; | |
15003 | + d5 ^= source2[5]; | |
15004 | + d6 ^= source2[6]; | |
15005 | + d7 ^= source2[7]; | |
15006 | + d0 ^= source3[0]; | |
15007 | + d1 ^= source3[1]; | |
15008 | + d2 ^= source3[2]; | |
15009 | + d3 ^= source3[3]; | |
15010 | + d4 ^= source3[4]; | |
15011 | + d5 ^= source3[5]; | |
15012 | + d6 ^= source3[6]; | |
15013 | + d7 ^= source3[7]; | |
15014 | + d0 ^= source4[0]; | |
15015 | + d1 ^= source4[1]; | |
15016 | + d2 ^= source4[2]; | |
15017 | + d3 ^= source4[3]; | |
15018 | + d4 ^= source4[4]; | |
15019 | + d5 ^= source4[5]; | |
15020 | + d6 ^= source4[6]; | |
15021 | + d7 ^= source4[7]; | |
15022 | + destp[0] = d0; /* Store the result (in burts) */ | |
15023 | + destp[1] = d1; | |
15024 | + destp[2] = d2; | |
15025 | + destp[3] = d3; | |
15026 | + destp[4] = d4; /* Store the result (in burts) */ | |
15027 | + destp[5] = d5; | |
15028 | + destp[6] = d6; | |
15029 | + destp[7] = d7; | |
15030 | + source1 += 8; | |
15031 | + source2 += 8; | |
15032 | + source3 += 8; | |
15033 | + source4 += 8; | |
15034 | + destp += 8; | |
15035 | + } | |
15036 | + break; | |
15037 | + } | |
15038 | +} | |
15039 | + | |
15040 | +/* | |
15041 | + * (the -6*32 shift factor colors the cache) | |
15042 | + */ | |
15043 | +#define SIZE (PAGE_SIZE-6*32) | |
15044 | + | |
15045 | +static void xor_speed ( struct xor_block_template * func, | |
15046 | + struct buffer_head *b1, struct buffer_head *b2) | |
15047 | +{ | |
15048 | + int speed; | |
15049 | + unsigned long now; | |
15050 | + int i, count, max; | |
15051 | + struct buffer_head *bh_ptr[6]; | |
15052 | + | |
15053 | + func->next = xor_functions; | |
15054 | + xor_functions = func; | |
15055 | + bh_ptr[0] = b1; | |
15056 | + bh_ptr[1] = b2; | |
15057 | + | |
15058 | + /* | |
15059 | + * count the number of XORs done during a whole jiffy. | |
15060 | + * calculate the speed of checksumming from this. | |
15061 | + * (we use a 2-page allocation to have guaranteed | |
15062 | + * color L1-cache layout) | |
15063 | + */ | |
15064 | + max = 0; | |
15065 | + for (i = 0; i < 5; i++) { | |
15066 | + now = jiffies; | |
15067 | + count = 0; | |
15068 | + while (jiffies == now) { | |
15069 | + mb(); | |
15070 | + func->xor_block(2,bh_ptr); | |
15071 | + mb(); | |
15072 | + count++; | |
15073 | + mb(); | |
15074 | + } | |
15075 | + if (count > max) | |
15076 | + max = count; | |
15077 | + } | |
15078 | + | |
15079 | + speed = max * (HZ*SIZE/1024); | |
15080 | + func->speed = speed; | |
15081 | + | |
15082 | + printk( " %-10s: %5d.%03d MB/sec\n", func->name, | |
15083 | + speed / 1000, speed % 1000); | |
15084 | +} | |
15085 | + | |
15086 | +static inline void pick_fastest_function(void) | |
15087 | +{ | |
15088 | + struct xor_block_template *f, *fastest; | |
15089 | + | |
15090 | + fastest = xor_functions; | |
15091 | + for (f = fastest; f; f = f->next) { | |
15092 | + if (f->speed > fastest->speed) | |
15093 | + fastest = f; | |
15094 | + } | |
15095 | +#ifdef CONFIG_X86_XMM | |
15096 | + if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) { | |
15097 | + fastest = &t_xor_block_pIII_kni; | |
15098 | + } | |
15099 | +#endif | |
15100 | + xor_block = fastest->xor_block; | |
15101 | + printk( "using fastest function: %s (%d.%03d MB/sec)\n", fastest->name, | |
15102 | + fastest->speed / 1000, fastest->speed % 1000); | |
15103 | +} | |
15104 | + | |
15105 | + | |
15106 | +void calibrate_xor_block(void) | |
15107 | +{ | |
15108 | + struct buffer_head b1, b2; | |
15109 | + | |
15110 | + memset(&b1,0,sizeof(b1)); | |
15111 | + b2 = b1; | |
15112 | + | |
15113 | + b1.b_data = (char *) md__get_free_pages(GFP_KERNEL,2); | |
15114 | + if (!b1.b_data) { | |
15115 | + pick_fastest_function(); | |
15116 | + return; | |
15117 | + } | |
15118 | + b2.b_data = b1.b_data + 2*PAGE_SIZE + SIZE; | |
15119 | + | |
15120 | + b1.b_size = SIZE; | |
15121 | + | |
15122 | + printk(KERN_INFO "raid5: measuring checksumming speed\n"); | |
15123 | + | |
15124 | + sti(); /* should be safe */ | |
15125 | + | |
15126 | +#if defined(__sparc__) && !defined(__sparc_v9__) | |
15127 | + printk(KERN_INFO "raid5: trying high-speed SPARC checksum routine\n"); | |
15128 | + xor_speed(&t_xor_block_SPARC,&b1,&b2); | |
15129 | +#endif | |
15130 | + | |
15131 | +#ifdef CONFIG_X86_XMM | |
15132 | + if (boot_cpu_data.mmu_cr4_features & X86_CR4_OSXMMEXCPT) { | |
15133 | + printk(KERN_INFO | |
15134 | + "raid5: KNI detected, trying cache-avoiding KNI checksum routine\n"); | |
15135 | + /* we force the use of the KNI xor block because it | |
15136 | + can write around l2. we may also be able | |
15137 | + to load into the l1 only depending on how | |
15138 | + the cpu deals with a load to a line that is | |
15139 | + being prefetched. | |
15140 | + */ | |
15141 | + xor_speed(&t_xor_block_pIII_kni,&b1,&b2); | |
15142 | + } | |
15143 | +#endif /* CONFIG_X86_XMM */ | |
15144 | + | |
15145 | +#ifdef __i386__ | |
15146 | + | |
15147 | + if (md_cpu_has_mmx()) { | |
15148 | + printk(KERN_INFO | |
15149 | + "raid5: MMX detected, trying high-speed MMX checksum routines\n"); | |
15150 | + xor_speed(&t_xor_block_pII_mmx,&b1,&b2); | |
15151 | + xor_speed(&t_xor_block_p5_mmx,&b1,&b2); | |
15152 | + } | |
15153 | + | |
15154 | +#endif /* __i386__ */ | |
15155 | + | |
15156 | + | |
15157 | + xor_speed(&t_xor_block_8regs,&b1,&b2); | |
15158 | + xor_speed(&t_xor_block_32regs,&b1,&b2); | |
15159 | + | |
15160 | + free_pages((unsigned long)b1.b_data,2); | |
15161 | + pick_fastest_function(); | |
15162 | +} | |
15163 | + | |
15164 | +#else /* __sparc_v9__ */ | |
15165 | + | |
15166 | +void calibrate_xor_block(void) | |
15167 | +{ | |
15168 | + printk(KERN_INFO "raid5: using high-speed VIS checksum routine\n"); | |
15169 | + xor_block = xor_block_VIS; | |
15170 | +} | |
15171 | + | |
15172 | +#endif /* __sparc_v9__ */ | |
15173 | + | |
15174 | +MD_EXPORT_SYMBOL(xor_block); | |
15175 | + | |
15176 | --- linux/arch/i386/defconfig.orig Fri Nov 23 11:17:42 2001 | |
15177 | +++ linux/arch/i386/defconfig Fri Nov 23 11:18:20 2001 | |
15178 | @@ -97,7 +97,15 @@ | |
15179 | # | |
15180 | # CONFIG_BLK_DEV_LOOP is not set | |
15181 | # CONFIG_BLK_DEV_NBD is not set | |
15182 | -# CONFIG_BLK_DEV_MD is not set | |
15183 | +CONFIG_BLK_DEV_MD=y | |
15184 | +CONFIG_AUTODETECT_RAID=y | |
15185 | +CONFIG_MD_TRANSLUCENT=y | |
15186 | +CONFIG_MD_LINEAR=y | |
15187 | +CONFIG_MD_STRIPED=y | |
15188 | +CONFIG_MD_MIRRORING=y | |
15189 | +CONFIG_MD_RAID5=y | |
15190 | +CONFIG_MD_BOOT=y | |
15191 | +CONFIG_BLK_DEV_HSM=y | |
15192 | # CONFIG_BLK_DEV_RAM is not set | |
15193 | # CONFIG_BLK_DEV_XD is not set | |
15194 | # CONFIG_BLK_DEV_DAC960 is not set | |
15195 | --- linux/arch/sparc/config.in.orig Fri Nov 23 11:17:42 2001 | |
15196 | +++ linux/arch/sparc/config.in Fri Nov 23 11:18:20 2001 | |
15197 | @@ -88,10 +88,16 @@ | |
15198 | ||
15199 | bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD | |
15200 | if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then | |
15201 | + bool 'Autodetect RAID partitions' CONFIG_AUTODETECT_RAID | |
15202 | tristate ' Linear (append) mode' CONFIG_MD_LINEAR | |
15203 | tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED | |
15204 | tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING | |
15205 | tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 | |
15206 | + tristate ' Translucent mode' CONFIG_MD_TRANSLUCENT | |
15207 | + tristate ' Hierarchical Storage Management support' CONFIG_MD_HSM | |
15208 | +fi | |
15209 | +if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then | |
15210 | + bool ' Boot support (linear, striped)' CONFIG_MD_BOOT | |
15211 | fi | |
15212 | ||
15213 | tristate 'RAM disk support' CONFIG_BLK_DEV_RAM | |
15214 | --- linux/arch/sparc/defconfig.orig Fri Nov 23 11:17:42 2001 | |
15215 | +++ linux/arch/sparc/defconfig Fri Nov 23 11:18:20 2001 | |
15216 | @@ -89,10 +89,13 @@ | |
15217 | # | |
15218 | CONFIG_BLK_DEV_FD=y | |
15219 | CONFIG_BLK_DEV_MD=y | |
15220 | +# CONFIG_AUTODETECT_RAID is not set | |
15221 | CONFIG_MD_LINEAR=m | |
15222 | CONFIG_MD_STRIPED=m | |
15223 | CONFIG_MD_MIRRORING=m | |
15224 | CONFIG_MD_RAID5=m | |
15225 | +# CONFIG_MD_TRANSLUCENT is not set | |
15226 | +# CONFIG_MD_HSM is not set | |
15227 | CONFIG_BLK_DEV_RAM=y | |
15228 | CONFIG_BLK_DEV_RAM_SIZE=4096 | |
15229 | CONFIG_BLK_DEV_INITRD=y | |
15230 | --- linux/arch/sparc64/config.in.orig Fri Nov 23 11:17:42 2001 | |
15231 | +++ linux/arch/sparc64/config.in Fri Nov 23 11:18:20 2001 | |
15232 | @@ -103,10 +103,16 @@ | |
15233 | ||
15234 | bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD | |
15235 | if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then | |
15236 | + bool 'Autodetect RAID partitions' CONFIG_AUTODETECT_RAID | |
15237 | tristate ' Linear (append) mode' CONFIG_MD_LINEAR | |
15238 | tristate ' RAID-0 (striping) mode' CONFIG_MD_STRIPED | |
15239 | tristate ' RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING | |
15240 | tristate ' RAID-4/RAID-5 mode' CONFIG_MD_RAID5 | |
15241 | + tristate ' Translucent mode' CONFIG_MD_TRANSLUCENT | |
15242 | + tristate ' Hierarchical Storage Management support' CONFIG_MD_HSM | |
15243 | +fi | |
15244 | +if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_STRIPED" = "y" ]; then | |
15245 | + bool ' Boot support (linear, striped)' CONFIG_MD_BOOT | |
15246 | fi | |
15247 | ||
15248 | tristate 'RAM disk support' CONFIG_BLK_DEV_RAM | |
15249 | --- linux/arch/sparc64/defconfig.orig Fri Nov 23 11:17:42 2001 | |
15250 | +++ linux/arch/sparc64/defconfig Fri Nov 23 11:18:20 2001 | |
15251 | @@ -107,10 +107,13 @@ | |
15252 | # | |
15253 | CONFIG_BLK_DEV_FD=y | |
15254 | CONFIG_BLK_DEV_MD=y | |
15255 | +# CONFIG_AUTODETECT_RAID is not set | |
15256 | CONFIG_MD_LINEAR=m | |
15257 | CONFIG_MD_STRIPED=m | |
15258 | CONFIG_MD_MIRRORING=m | |
15259 | CONFIG_MD_RAID5=m | |
15260 | +# CONFIG_MD_TRANSLUCENT is not set | |
15261 | +# CONFIG_MD_HSM is not set | |
15262 | CONFIG_BLK_DEV_RAM=y | |
15263 | CONFIG_BLK_DEV_RAM_SIZE=4096 | |
15264 | CONFIG_BLK_DEV_INITRD=y | |
15265 | --- linux/Documentation/Configure.help.orig Fri Nov 23 11:17:44 2001 | |
15266 | +++ linux/Documentation/Configure.help Fri Nov 23 11:18:20 2001 | |
15267 | @@ -1054,6 +1054,13 @@ | |
15268 | ||
15269 | If unsure, say N. | |
15270 | ||
15271 | +Autodetect RAID partitions | |
15272 | +CONFIG_AUTODETECT_RAID | |
15273 | + This feature lets the kernel detect RAID partitions on bootup. | |
15274 | + An autodetect RAID partition is a normal partition with partition | |
15275 | + type 0xfd. Use this if you want to boot RAID devices, or want to | |
15276 | + run them automatically. | |
15277 | + | |
15278 | Linear (append) mode | |
15279 | CONFIG_MD_LINEAR | |
15280 | If you say Y here, then your multiple devices driver will be able to | |
15281 | @@ -1132,6 +1139,21 @@ | |
15282 | Documentation/modules.txt. | |
15283 | ||
15284 | If unsure, say Y. | |
15285 | + | |
15286 | +Translucent Block Device Support (EXPERIMENTAL) | |
15287 | +CONFIG_MD_TRANSLUCENT | |
15288 | + DO NOT USE THIS STUFF YET! | |
15289 | + | |
15290 | + currently there is only a placeholder there as the implementation | |
15291 | + is not yet usable. | |
15292 | + | |
15293 | +Hierarchical Storage Management support (EXPERIMENTAL) | |
15294 | +CONFIG_MD_HSM | |
15295 | + DO NOT USE THIS STUFF YET! | |
15296 | + | |
15297 | + i have released this so people can comment on the architecture, | |
15298 | + but user-space tools are still unusable so there is nothing much | |
15299 | + you can do with this. | |
15300 | ||
15301 | Boot support (linear, striped) | |
15302 | CONFIG_MD_BOOT |