]>
Commit | Line | Data |
---|---|---|
30dc7178 | 1 | diff -Naur linux-2002-09-30/drivers/evms/AIXlvm_vge.c evms-2002-09-30/drivers/evms/AIXlvm_vge.c |
2 | --- linux-2002-09-30/drivers/evms/AIXlvm_vge.c Wed Dec 31 18:00:00 1969 | |
3 | +++ evms-2002-09-30/drivers/evms/AIXlvm_vge.c Fri Sep 27 14:55:45 2002 | |
4 | @@ -0,0 +1,3681 @@ | |
5 | +/* -*- linux-c -*- */ | |
6 | + | |
7 | +/* | |
8 | + * | |
9 | + * | |
10 | + * Copyright (c) International Business Machines Corp., 2000 | |
11 | + * | |
12 | + * This program is free software; you can redistribute it and/or modify | |
13 | + * it under the terms of the GNU General Public License as published by | |
14 | + * the Free Software Foundation; either version 2 of the License, or | |
15 | + * (at your option) any later version. | |
16 | + * | |
17 | + * This program is distributed in the hope that it will be useful, | |
18 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
20 | + * the GNU General Public License for more details. | |
21 | + * | |
22 | + * You should have received a copy of the GNU General Public License | |
23 | + * along with this program; if not, write to the Free Software | |
24 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
25 | + * | |
26 | + * | |
27 | + */ | |
28 | +/* | |
29 | + * linux/drivers/evms/AIXlvm_vge.c | |
30 | + * | |
31 | + * EVMS AIX LVM Volume Group Emulator | |
32 | + * | |
33 | + * | |
34 | + */ | |
35 | + | |
36 | +#define EVMS_DEBUG 1 | |
37 | +#define EVMS_AIX_DEBUG 1 | |
38 | + | |
39 | +#define AIX_COMMON_SERVICES_MAJOR 0 // Required common services levels for the AIX kernel plugin | |
40 | +#define AIX_COMMON_SERVICES_MINOR 5 // These must be incremented if new function is added to common | |
41 | +#define AIX_COMMON_SERVICES_PATCHLEVEL 0 // services and the AIX kernel plugin uses the new function. | |
42 | +#define AIX_INCREMENT_REQUEST 1 | |
43 | +#define AIX_DECREMENT_REQUEST -1 | |
44 | +#define AIX_RESYNC_BLOCKSIZE 512 | |
45 | +#define AIX_SYNC_INCOMPLETE 0x01 | |
46 | +#define AIX_SYNC_COMPLETE 0x00 | |
47 | +#define AIX_MASTER 0 | |
48 | +#define AIX_SLAVE_1 1 | |
49 | +#define AIX_SLAVE_2 2 | |
50 | + | |
51 | +#include <linux/module.h> | |
52 | +#include <linux/kernel.h> | |
53 | +#include <linux/config.h> | |
54 | + | |
55 | +#include <linux/genhd.h> | |
56 | +#include <linux/string.h> | |
57 | +#include <linux/blk.h> | |
58 | +#include <linux/init.h> | |
59 | +#include <linux/slab.h> | |
60 | + | |
61 | +#include <linux/evms/evms.h> | |
62 | +#include <linux/evms/evms_aix.h> | |
63 | +#include <asm/system.h> | |
64 | +#include <asm/uaccess.h> | |
65 | + | |
66 | +#include <linux/sched.h> | |
67 | +#include <linux/smp_lock.h> | |
68 | +#include <linux/locks.h> | |
69 | +#include <linux/delay.h> | |
70 | +#include <linux/reboot.h> | |
71 | +#include <linux/completion.h> | |
72 | +#include <linux/vmalloc.h> | |
73 | + | |
74 | +#ifdef EVMS_AIX_DEBUG | |
75 | +static int AIX_volume_group_dump(void); | |
76 | +#endif | |
77 | + | |
78 | +static struct aix_volume_group *AIXVolumeGroupList = NULL; | |
79 | +static struct evms_thread *AIX_mirror_read_retry_thread; | |
80 | +static struct evms_thread *AIX_mirror_resync_thread; | |
81 | +static struct evms_pool_mgmt *AIX_BH_list_pool = NULL; | |
82 | +static struct aix_mirror_bh *AIX_retry_list = NULL; | |
83 | +static struct aix_mirror_bh **AIX_retry_tail = NULL; | |
84 | +static spinlock_t AIX_retry_list_lock = SPIN_LOCK_UNLOCKED; | |
85 | +static spinlock_t AIX_resync_list_lock = SPIN_LOCK_UNLOCKED; | |
86 | +static spinlock_t AIX_resync_pp_lock = SPIN_LOCK_UNLOCKED; | |
87 | +static int AIXResyncInProgress = FALSE; | |
88 | +static struct aix_resync_struct *AIX_resync_list = NULL; | |
89 | + | |
90 | +// Plugin API prototypes | |
91 | + | |
92 | +static void AIXiod(void *data); | |
93 | +static void AIXresync(void *data); | |
94 | +static int discover_aix(struct evms_logical_node **evms_logical_disk_head); | |
95 | +static int discover_volume_groups(struct evms_logical_node **); | |
96 | +static int discover_logical_volumes(void); | |
97 | +static int end_discover_aix(struct evms_logical_node **evms_logical_disk_head); | |
98 | +static void read_aix(struct evms_logical_node *node, struct buffer_head *bh); | |
99 | +static void write_aix(struct evms_logical_node *node, struct buffer_head *bh); | |
100 | +static int ioctl_aix(struct evms_logical_node *logical_node, | |
101 | + struct inode *inode, | |
102 | + struct file *file, unsigned int cmd, unsigned long arg); | |
103 | + | |
104 | +static int aix_direct_ioctl(struct inode *inode, | |
105 | + struct file *file, | |
106 | + unsigned int cmd, unsigned long args); | |
107 | + | |
108 | +static int AIX_remap_sector(struct evms_logical_node *node, u64 org_sector, // logical sector to remap | |
109 | + u64 size, // size (in sectors) of request to remap | |
110 | + u64 * new_sector, // remapped sector | |
111 | + u64 * new_size, // new size (in sectors) | |
112 | + struct partition_list_entry **partition, // new node for which new_sector is relative | |
113 | + u32 * le, u32 * offset_in_le); | |
114 | + | |
115 | +static int validate_build_volume_group_disk_info(struct evms_logical_node | |
116 | + *logical_node, | |
117 | + struct AIXlvm_rec *AIXlvm); | |
118 | + | |
119 | +static int add_VG_data_to_VG_list(struct evms_logical_node *logical_node, | |
120 | + struct aix_volume_group *new_group, | |
121 | + short int pvNum); | |
122 | +static int add_PV_to_volume_group(struct aix_volume_group *group, | |
123 | + struct evms_logical_node *evms_partition, | |
124 | + int pvNum); | |
125 | +static struct aix_volume_group *AIX_create_volume_group(struct evms_logical_node | |
126 | + *logical_node, | |
127 | + struct AIXlvm_rec | |
128 | + *AIXlvm); | |
129 | + | |
130 | +static int AIX_update_volume_group(struct aix_volume_group *AIXVGLptr, | |
131 | + struct evms_logical_node *logical_node, | |
132 | + struct AIXlvm_rec *AIXlvm); | |
133 | + | |
134 | +static int AIX_evms_cs_notify_lv_io_error(struct evms_logical_node *node); | |
135 | + | |
136 | +static int AIX_pvh_data_posn(u32 vgda_psn, u32 * pvh_posn, struct partition_list_entry *partition, u32 numpvs); | |
137 | + | |
138 | +static int AIX_resync_lv_mirrors(struct aix_logical_volume *volume, int force); | |
139 | + | |
140 | +static int AIX_copy_on_read(struct aix_logical_volume *volume, | |
141 | + struct partition_list_entry *master_part, | |
142 | + struct partition_list_entry *slave1_part, | |
143 | + struct partition_list_entry *slave2_part, | |
144 | + u64 master_offset, | |
145 | + u64 slave1_offset, | |
146 | + u64 slave2_offset, u32 pe_size, int le); | |
147 | + | |
148 | +static int export_volumes(struct evms_logical_node **evms_logical_disk_head); | |
149 | +static int lvm_cleanup(void); | |
150 | +static int AIX_copy_header_info(struct vg_header *AIXvgh, | |
151 | + struct vg_header *AIXvgh2); | |
152 | +static int build_pe_maps(struct aix_volume_group *volume_group); | |
153 | + | |
154 | +static struct aix_logical_volume *new_logical_volume(struct lv_entries | |
155 | + *AIXlvent, | |
156 | + struct aix_volume_group | |
157 | + *group, char *lv_name, | |
158 | + u32 stripesize); | |
159 | + | |
160 | +static int check_log_volume_and_pe_maps(struct aix_volume_group *group); | |
161 | +static int check_volume_groups(void); | |
162 | +static int init_io_aix(struct evms_logical_node *node, int io_flag, /* 0=read, 1=write */ | |
163 | + u64 sect_nr, /* disk LBA */ | |
164 | + u64 num_sects, /* # of sectors */ | |
165 | + void *buf_addr); /* buffer address */ | |
166 | + | |
167 | +static int delete_logical_volume(struct aix_logical_volume *volume); | |
168 | +static int delete_aix_node(struct evms_logical_node *logical_node); | |
169 | +static int deallocate_volume_group(struct aix_volume_group *group); | |
170 | + | |
171 | +static void AIX_handle_read_mirror_drives(struct buffer_head *bh, int uptodate); | |
172 | + | |
173 | +static void AIX_handle_write_mirror_drives(struct buffer_head *bh, | |
174 | + int uptodate); | |
175 | + | |
176 | +static void aix_notify_cache_ctor(void *foo, kmem_cache_t * cachep, | |
177 | + unsigned long flags); | |
178 | + | |
179 | +static void AIX_schedule_resync(struct aix_logical_volume *resync_volume, | |
180 | + int force); | |
181 | +static struct aix_logical_volume *AIX_get_volume_data(char *object_name); | |
182 | + | |
183 | +static void AIX_sync_mirrored_partitions(struct buffer_head *bh, int uptodate); | |
184 | + | |
185 | +static int AIX_get_set_mirror_offset(struct aix_mirror_bh *tmp_bh, | |
186 | + int index, int offset); | |
187 | + | |
188 | +static struct aix_mirror_bh *AIX_alloc_rbh(struct evms_logical_node *node, | |
189 | + struct buffer_head *bh, | |
190 | + u32 mirror_copies, | |
191 | + u32 le, u64 org_sector, int cmd); | |
192 | + | |
193 | +static struct aix_mirror_bh *AIX_alloc_wbh(struct evms_logical_node *node, | |
194 | + struct evms_logical_node *node2, | |
195 | + struct evms_logical_node *node3, | |
196 | + struct buffer_head *bh, | |
197 | + u32 mirror_copies, | |
198 | + u32 le, | |
199 | + u64 new_sector2, u64 new_sector3); | |
200 | + | |
201 | +static struct aix_mirror_bh *AIX_alloc_sbh(struct aix_logical_volume *volume, | |
202 | + struct partition_list_entry | |
203 | + *master_part, | |
204 | + struct partition_list_entry | |
205 | + *slave1_part, | |
206 | + struct partition_list_entry | |
207 | + *slave2_part, u64 master_offset, | |
208 | + u64 slave1_offset, u64 slave2_offset, | |
209 | + u32 pe_size); | |
210 | + | |
211 | +static void AIX_free_headers(struct vg_header *AIXvgh, | |
212 | + struct vg_header *AIXvgh2, | |
213 | + struct vg_trailer *AIXvgt, | |
214 | + struct vg_trailer *AIXvgt2); | |
215 | + | |
216 | +static int remove_group_from_list(struct aix_volume_group *group); | |
217 | + | |
218 | +//**************************************************************************************************** | |
219 | + | |
220 | +/* END of PROTOTYES*/ | |
221 | + | |
222 | +#define GET_PHYSICAL_PART_SIZE(v1) (1 << v1) | |
223 | + | |
224 | +#define COMPARE_TIMESTAMPS(t1, t2) ( (t1).tv_sec == (t2).tv_sec && \ | |
225 | + (t1).tv_nsec == (t2).tv_nsec ) | |
226 | + | |
227 | +#define COMPARE_UNIQUE_IDS(id1, id2) ( (id1).word1 == (id2).word1 && \ | |
228 | + (id1).word2 == (id2).word2 && \ | |
229 | + (id1).word3 == (id2).word3 && \ | |
230 | + (id1).word4 == (id2).word4 ) | |
231 | + | |
232 | +#define SECTOR_IN_RANGE(s1, s2) ((s2 > s1) && (s2 < s1 + AIX_RESYNC_BLOCKSIZE)) | |
233 | + | |
234 | +#define AIX_PV_STATE_VALID 0 // Both VGDAs are valid and match. | |
235 | +#define AIX_PV_STATE_FIRST_VGDA 1 // Only the first VGDA is valid. | |
236 | +#define AIX_PV_STATE_SECOND_VGDA 2 // Only the second VGDA is valid. | |
237 | +#define AIX_PV_STATE_EITHER_VGDA -1 // Both VGDAs are valid, but do not match each other. | |
238 | +#define AIX_PV_STATE_INVALID -2 // We're in an invalid state but there's more PVs in this group | |
239 | + | |
240 | +#ifndef EVMS_AIX_DEBUG | |
241 | +#define AIX_VOLUME_GROUP_DUMP() | |
242 | +#else | |
243 | +#define AIX_VOLUME_GROUP_DUMP() LOG_DEBUG("Called line:%d \n",__LINE__); \ | |
244 | + AIX_volume_group_dump() | |
245 | +#endif | |
246 | + | |
247 | +// Global LVM data structures | |
248 | + | |
249 | +static struct evms_plugin_fops AIXlvm_fops = { | |
250 | + .discover = discover_aix, | |
251 | + .end_discover = end_discover_aix, | |
252 | + .delete = delete_aix_node, | |
253 | + .read = read_aix, | |
254 | + .write = write_aix, | |
255 | + .init_io = init_io_aix, | |
256 | + .ioctl = ioctl_aix, | |
257 | + .direct_ioctl = aix_direct_ioctl | |
258 | +}; | |
259 | + | |
260 | +static struct evms_plugin_header plugin_header = { | |
261 | + .id = SetPluginID(IBM_OEM_ID, | |
262 | + EVMS_REGION_MANAGER, | |
263 | + EVMS_AIX_FEATURE_ID), | |
264 | + .version = { | |
265 | + .major = 1, | |
266 | + .minor = 1, | |
267 | + .patchlevel = 1}, | |
268 | + .required_services_version = { | |
269 | + .major = AIX_COMMON_SERVICES_MAJOR, | |
270 | + .minor = AIX_COMMON_SERVICES_MINOR, | |
271 | + .patchlevel = | |
272 | + AIX_COMMON_SERVICES_PATCHLEVEL}, | |
273 | + .fops = &AIXlvm_fops | |
274 | +}; | |
275 | + | |
276 | +/* | |
277 | + * Function: remap sector | |
278 | + * Common function to remap volume lba to partition lba in appropriate PE | |
279 | + */ | |
280 | +static int | |
281 | +AIX_remap_sector(struct evms_logical_node *node, u64 org_sector, // logical sector to remap | |
282 | + u64 size, // size (in sectors) of request to remap | |
283 | + u64 * new_sector, // remapped sector | |
284 | + u64 * new_size, // new size (in sectors) | |
285 | + struct partition_list_entry **partition, // new node for which new_sector is relative | |
286 | + u32 * le, u32 * offset_in_le) | |
287 | +{ | |
288 | + struct aix_logical_volume *volume; | |
289 | + | |
290 | + u32 sectors_per_stripe; | |
291 | + u32 partition_to_use; | |
292 | + u32 column; | |
293 | + u32 stripe_in_column; | |
294 | + | |
295 | + u32 org_sector32; // Until striping is 64-bit enabled. | |
296 | + | |
297 | + volume = (struct aix_logical_volume *) node->private; | |
298 | + | |
299 | +#ifdef EVMS_DEBUG | |
300 | + LOG_DEBUG("-- %s volume:%p lv:%d size:" PFU64 " Name:%s\n", | |
301 | + __FUNCTION__, volume, volume->lv_number, size, volume->name); | |
302 | + LOG_DEBUG(" node %p node_name [%s] org_sector:" PFU64 "\n", node, | |
303 | + node->name, org_sector); | |
304 | + LOG_DEBUG(" mirror_copies:%d volume->lv_size:" PFU64 "\n", | |
305 | + volume->mirror_copies, volume->lv_size); | |
306 | +#endif | |
307 | + | |
308 | + org_sector32 = org_sector; | |
309 | + | |
310 | + *(new_size) = size; | |
311 | + | |
312 | + // Check if volume is striped. Reset the size if the request | |
313 | + // crosses a stripe boundary. | |
314 | + if (volume->stripes > 1) { | |
315 | +#ifdef EVMS_DEBUG | |
316 | + LOG_DEBUG(" *** STRIPED ***\n"); | |
317 | + LOG_DEBUG(" ------- volume->stripe_size:%d org_sector:%d volume_stripes:%d\n", | |
318 | + volume->stripe_size, org_sector32, volume->stripes); | |
319 | +#endif | |
320 | + | |
321 | + *(le) = org_sector >> volume->pe_size_shift; // 64-bit safe | |
322 | + *(offset_in_le) = org_sector & (volume->pe_size - 1); // 64-bit safe | |
323 | + | |
324 | +#ifdef EVMS_DEBUG | |
325 | + LOG_DEBUG("OLD - le:%d -- offset_in_le:%d \n", *(le), | |
326 | + *(offset_in_le)); | |
327 | +#endif | |
328 | + | |
329 | + sectors_per_stripe = volume->stripe_size / AIX_SECTOR_SIZE; | |
330 | + partition_to_use = | |
331 | + (org_sector32 / sectors_per_stripe) % volume->stripes; | |
332 | + stripe_in_column = | |
333 | + ((((org_sector32 / volume->stripe_size) / volume->stripes) * | |
334 | + volume->stripe_size) + | |
335 | + (org_sector32 % sectors_per_stripe)); | |
336 | + column = | |
337 | + ((org_sector32 / sectors_per_stripe) / volume->stripes) * | |
338 | + sectors_per_stripe; | |
339 | + | |
340 | +#ifdef EVMS_DEBUG | |
341 | + LOG_DEBUG("offset_in_le:%d org_sector:" PFU64 | |
342 | + " pe_shift:%d stripe_shift:%d\n", *(offset_in_le), | |
343 | + org_sector, volume->pe_size_shift, | |
344 | + volume->stripe_size_shift); | |
345 | + | |
346 | + LOG_DEBUG(" org_sector:%d sectors_per_stripe:%d partition_to_use:%d stripe_in_column:%d column:%d\n", | |
347 | + org_sector32, sectors_per_stripe, partition_to_use, | |
348 | + stripe_in_column, column); | |
349 | + LOG_DEBUG(" offset_in_le + size:" PFU64 | |
350 | + " volume->pe_size:%d volume->lv_size:" PFU64 "\n", | |
351 | + (*(offset_in_le) + size), volume->pe_size, | |
352 | + volume->lv_size); | |
353 | +#endif | |
354 | + | |
355 | + if (*(offset_in_le) + size > volume->pe_size) { | |
356 | + *new_size = volume->pe_size - *(offset_in_le); | |
357 | + LOG_DEBUG(" new_size " PFU64 "\n", *new_size); | |
358 | + } | |
359 | + | |
360 | + } | |
361 | + // Non-striped volume. Just find LE and offset. Reset the size | |
362 | + // if the request crosses an LE boundary. | |
363 | + else { | |
364 | +#ifdef EVMS_DEBUG | |
365 | + LOG_DEBUG(" *** NON-STRIPED ***\n"); | |
366 | +#endif | |
367 | + | |
368 | + *(le) = org_sector >> volume->pe_size_shift; // 64-bit safe | |
369 | + *(offset_in_le) = org_sector & (volume->pe_size - 1); // 64-bit safe | |
370 | + | |
371 | + } | |
372 | + | |
373 | +#ifdef EVMS_DEBUG | |
374 | + LOG_DEBUG(" offset_in_le:%d org_sector:" PFU64 " shift:%d\n", | |
375 | + *(offset_in_le), org_sector, volume->pe_size_shift); | |
376 | + | |
377 | + if (*(le) >= volume->num_le) { | |
378 | + LOG_DEBUG(" le Memory Overwrite !! le:%d vs volume->num_le:%d\n", | |
379 | + *(le), volume->num_le); | |
380 | + return -EINVAL; | |
381 | + } | |
382 | +#endif | |
383 | + | |
384 | + *(new_sector) = volume->le_to_pe_map[*(le)].pe_sector_offset + *(offset_in_le); | |
385 | + *(partition) = volume->le_to_pe_map[*(le)].owning_pv; | |
386 | + | |
387 | +#ifdef EVMS_DEBUG | |
388 | + LOG_DEBUG(" new_sector:" PFU64 "\n", *(new_sector)); | |
389 | + LOG_DEBUG(" Owning Part %p\n", *(partition)); | |
390 | + LOG_DEBUG(" End %s\n", __FUNCTION__); | |
391 | +#endif | |
392 | + | |
393 | + return (0); | |
394 | +} | |
395 | + | |
396 | +/* | |
397 | + * Function: read_aix | |
398 | + */ | |
399 | +static void | |
400 | +read_aix(struct evms_logical_node *node, struct buffer_head *bh) | |
401 | +{ | |
402 | + struct partition_list_entry *partition; | |
403 | + u64 org_sector; | |
404 | + u64 new_sector; | |
405 | + u64 new_size; | |
406 | + struct aix_logical_volume *volume; | |
407 | + struct aix_mirror_bh *tmp_bh; | |
408 | + u32 le, offset_in_le, count; | |
409 | + int flags = 0; | |
410 | + | |
411 | + volume = (struct aix_logical_volume *) node->private; | |
412 | +//#ifdef EVMS_DEBUG | |
413 | +// LOG_DEBUG(" ***** %s ***** bh:%p volume->iter:%d\n", __FUNCTION__, bh, | |
414 | +// volume->mirror_iterations); | |
415 | +//#endif | |
416 | + | |
417 | +#ifdef EVMS_DEBUG | |
418 | + LOG_DEBUG(" node->total_vsectors:" PFU64 "\n", node->total_vsectors); | |
419 | + LOG_DEBUG(" rsector:%lu rsize:%u node_flags:%u\n", bh->b_rsector, | |
420 | + bh->b_size, node->flags); | |
421 | +#endif | |
422 | + | |
423 | + // Check if I/O goes past end of logical volume. | |
424 | + if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) > | |
425 | + node->total_vsectors) { | |
426 | + LOG_CRITICAL(" read_aix ERROR %d\n", __LINE__); | |
427 | + buffer_IO_error(bh); | |
428 | + return; | |
429 | + } | |
430 | + | |
431 | + // Logical-to-physical remapping. | |
432 | + if (AIX_remap_sector | |
433 | + (node, bh->b_rsector, (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT), | |
434 | + &new_sector, &new_size, &partition, &le, &offset_in_le) | |
435 | + || (!partition || !new_sector)) { | |
436 | + LOG_CRITICAL(" read_aix bh: ERROR %d\n", __LINE__); | |
437 | + buffer_IO_error(bh); | |
438 | + return; | |
439 | + } | |
440 | + | |
441 | + org_sector = bh->b_rsector; | |
442 | + bh->b_rsector = new_sector; | |
443 | + //bh->b_size = new_size; | |
444 | + | |
445 | +#ifdef EVMS_DEBUG | |
446 | + LOG_DEBUG(" read_aix Mirror_Copies:%d\n", volume->mirror_copies); | |
447 | +#endif | |
448 | + | |
449 | + if (volume->mirror_copies > AIX_DEFAULT_MIRRORING) { | |
450 | + | |
451 | + tmp_bh = | |
452 | + AIX_alloc_rbh(node, bh, 1, le, new_sector, AIX_LV_READ); | |
453 | + | |
454 | + if (!tmp_bh) { | |
455 | + buffer_IO_error(bh); | |
456 | + return; | |
457 | + } | |
458 | + | |
459 | + if (volume->le_to_pe_map_mir1) { | |
460 | + tmp_bh->mir_node1 = | |
461 | + volume->le_to_pe_map_mir1[le].owning_pv-> | |
462 | + logical_node; | |
463 | + tmp_bh->mir_sector1 = | |
464 | + volume->le_to_pe_map_mir1[le].pe_sector_offset + | |
465 | + offset_in_le; | |
466 | + } | |
467 | + | |
468 | + if (volume->mirror_copies == AIX_MAX_MIRRORS) { | |
469 | + tmp_bh->mir_node2 = | |
470 | + volume->le_to_pe_map_mir2[le].owning_pv-> | |
471 | + logical_node; | |
472 | + tmp_bh->mir_sector2 = | |
473 | + volume->le_to_pe_map_mir2[le].pe_sector_offset + | |
474 | + offset_in_le; | |
475 | + } | |
476 | + | |
477 | + if (evms_cs_volume_request_in_progress | |
478 | + (tmp_bh->bh_req.b_rdev, AIX_INCREMENT_REQUEST, &count)) { | |
479 | + buffer_IO_error(bh); | |
480 | + return; | |
481 | + } | |
482 | + | |
483 | + if (AIXResyncInProgress) { | |
484 | + if (SECTOR_IN_RANGE | |
485 | + (tmp_bh->bh_req.b_rsector, | |
486 | + AIX_resync_list->master_offset)) { | |
487 | + spin_lock_irqsave(&AIX_resync_list_lock, flags); | |
488 | + } | |
489 | + } | |
490 | + | |
491 | + R_IO(partition->logical_node, &tmp_bh->bh_req); | |
492 | + | |
493 | + if (AIXResyncInProgress) { | |
494 | + if (SECTOR_IN_RANGE | |
495 | + (tmp_bh->bh_req.b_rsector, | |
496 | + AIX_resync_list->master_offset)) { | |
497 | + spin_unlock_irqrestore(&AIX_resync_list_lock, | |
498 | + flags); | |
499 | + } | |
500 | + } | |
501 | + | |
502 | + } else { | |
503 | + | |
504 | + R_IO(partition->logical_node, bh); | |
505 | + } | |
506 | + | |
507 | +#ifdef EVMS_DEBUG | |
508 | + LOG_DEBUG(" ***** %s ***** returning\n", __FUNCTION__); | |
509 | +#endif | |
510 | + return; | |
511 | +} | |
512 | + | |
513 | +/* | |
514 | + * Function: write_aix | |
515 | + */ | |
516 | +static void | |
517 | +write_aix(struct evms_logical_node *node, struct buffer_head *bh) | |
518 | +{ | |
519 | + struct partition_list_entry *partition; | |
520 | + u64 new_sector, new_sector2 = 0, new_sector3 = 0; | |
521 | + u64 org_sector; | |
522 | + u64 new_size; | |
523 | + struct aix_logical_volume *volume; | |
524 | + struct aix_mirror_bh *tmp_bh; | |
525 | + struct evms_logical_node *node2 = NULL, *node3 = NULL; | |
526 | + u32 le, offset_in_le, count; | |
527 | + int flags = 0; | |
528 | + | |
529 | + volume = (struct aix_logical_volume *) node->private; | |
530 | + | |
531 | +#ifdef EVMS_DEBUG | |
532 | +// LOG_DEBUG(" ***** %s ***** bh:%p volume->iter:%d\n", __FUNCTION__, bh, | |
533 | +// volume->mirror_iterations); | |
534 | + LOG_DEBUG(" write_aix rsector:%lu rsize:%u\n", bh->b_rsector, | |
535 | + bh->b_size); | |
536 | + LOG_DEBUG(" write_aix total_sectors:" PFU64 "\n", node->total_vsectors); | |
537 | +#endif | |
538 | + | |
539 | + if (volume->lv_access & EVMS_LV_INCOMPLETE) { //No writes allowed on incomplete volumes | |
540 | + LOG_CRITICAL(" write_aix incomplete volume ERROR %d\n", | |
541 | + __LINE__); | |
542 | + buffer_IO_error(bh); | |
543 | + return; | |
544 | + } | |
545 | + | |
546 | + // Check if I/O goes past end of logical volume. | |
547 | + if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) > | |
548 | + node->total_vsectors) { | |
549 | + LOG_CRITICAL(" write_aix ERROR %d\n", __LINE__); | |
550 | + buffer_IO_error(bh); | |
551 | + return; | |
552 | + } | |
553 | + // Logical-to-Physical remapping | |
554 | + if (AIX_remap_sector | |
555 | + (node, bh->b_rsector, (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT), | |
556 | + &new_sector, &new_size, &partition, &le, &offset_in_le) | |
557 | + || (!new_sector || !partition)) { | |
558 | + LOG_CRITICAL(" write_aix ERROR %d\n", __LINE__); | |
559 | + buffer_IO_error(bh); | |
560 | + return; | |
561 | + } | |
562 | + | |
563 | + org_sector = bh->b_rsector; | |
564 | + bh->b_rsector = new_sector; | |
565 | + //bh->b_size = new_size; | |
566 | + | |
567 | +#ifdef EVMS_DEBUG | |
568 | + LOG_DEBUG(" write_aix Mirror_Copies:%d\n", volume->mirror_copies); | |
569 | +#endif | |
570 | + | |
571 | + if (volume->mirror_copies > AIX_DEFAULT_MIRRORING) { | |
572 | + | |
573 | + if (volume->le_to_pe_map_mir1) { | |
574 | + new_sector2 = | |
575 | + volume->le_to_pe_map_mir1[le].pe_sector_offset + | |
576 | + offset_in_le; | |
577 | + node2 = | |
578 | + volume->le_to_pe_map_mir1[le].owning_pv-> | |
579 | + logical_node; | |
580 | + } | |
581 | + | |
582 | + if (volume->mirror_copies == AIX_MAX_MIRRORS) { | |
583 | + | |
584 | + new_sector3 = | |
585 | + volume->le_to_pe_map_mir2[le].pe_sector_offset + | |
586 | + offset_in_le; | |
587 | + node3 = | |
588 | + volume->le_to_pe_map_mir2[le].owning_pv-> | |
589 | + logical_node; | |
590 | + } | |
591 | + | |
592 | + tmp_bh = | |
593 | + AIX_alloc_wbh(partition->logical_node, node2, node3, bh, le, | |
594 | + volume->mirror_copies, new_sector2, | |
595 | + new_sector3); | |
596 | + | |
597 | + if (!tmp_bh) { | |
598 | + buffer_IO_error(bh); | |
599 | + return; | |
600 | + } | |
601 | + tmp_bh->node = node; | |
602 | + | |
603 | + tmp_bh = tmp_bh->mirror_bh_list; | |
604 | + | |
605 | + if (evms_cs_volume_request_in_progress | |
606 | + (tmp_bh->bh_req.b_rdev, AIX_INCREMENT_REQUEST, &count)) { | |
607 | + buffer_IO_error(bh); | |
608 | + // free memory here | |
609 | + return; | |
610 | + } | |
611 | + | |
612 | + if (AIXResyncInProgress) { | |
613 | + if (SECTOR_IN_RANGE | |
614 | + (tmp_bh->bh_req.b_rsector, | |
615 | + AIX_resync_list->master_offset)) { | |
616 | + spin_lock_irqsave(&AIX_resync_list_lock, flags); | |
617 | + } | |
618 | + } | |
619 | + | |
620 | + W_IO(tmp_bh->node, &tmp_bh->bh_req); | |
621 | + | |
622 | + if (AIXResyncInProgress) { | |
623 | + if (SECTOR_IN_RANGE | |
624 | + (tmp_bh->bh_req.b_rsector, | |
625 | + AIX_resync_list->master_offset)) { | |
626 | + spin_unlock_irqrestore(&AIX_resync_list_lock, | |
627 | + flags); | |
628 | + } | |
629 | + } | |
630 | + | |
631 | + tmp_bh = tmp_bh->next_r1; | |
632 | + | |
633 | + if (tmp_bh) { | |
634 | + W_IO(tmp_bh->node, &tmp_bh->bh_req); | |
635 | + tmp_bh = tmp_bh->next_r1; | |
636 | + } | |
637 | + | |
638 | + if (tmp_bh) { | |
639 | + W_IO(tmp_bh->node, &tmp_bh->bh_req); | |
640 | + } | |
641 | + | |
642 | + } else { | |
643 | + | |
644 | + W_IO(partition->logical_node, bh); | |
645 | + } | |
646 | + | |
647 | +#ifdef EVMS_DEBUG | |
648 | + LOG_DEBUG(" ***** %s returning *****\n", __FUNCTION__); | |
649 | +#endif | |
650 | + return; | |
651 | +} | |
652 | + | |
653 | +/* | |
654 | + * Function: ioctl_aix | |
655 | + * | |
656 | + */ | |
657 | +static int | |
658 | +ioctl_aix(struct evms_logical_node *logical_node, | |
659 | + struct inode *inode, | |
660 | + struct file *file, unsigned int cmd, unsigned long arg) | |
661 | +{ | |
662 | + struct aix_logical_volume *volume = | |
663 | + (struct aix_logical_volume *) (logical_node->private); | |
664 | + int rc = 0; | |
665 | + | |
666 | + LOG_EXTRA(" Ioctl %u\n", cmd); | |
667 | + | |
668 | + switch (cmd) { | |
669 | + | |
670 | + case HDIO_GETGEO: | |
671 | + { | |
672 | + // Fixed geomerty for all LVM volumes | |
673 | + unsigned char heads = 64; | |
674 | + unsigned char sectors = 32; | |
675 | + long start = 0; | |
676 | + struct hd_geometry *hd = (struct hd_geometry *) arg; | |
677 | + short cylinders; | |
678 | + cylinders = logical_node->total_vsectors; | |
679 | + cylinders = (cylinders / heads) / sectors; | |
680 | + | |
681 | + if (hd == NULL) { | |
682 | + return -EINVAL; | |
683 | + } | |
684 | + | |
685 | + if (copy_to_user | |
686 | + ((char *) (&hd->heads), &heads, sizeof (heads)) != 0 | |
687 | + || copy_to_user((char *) (&hd->sectors), §ors, | |
688 | + sizeof (sectors)) != 0 | |
689 | + || copy_to_user((short *) (&hd->cylinders), | |
690 | + &cylinders, sizeof (cylinders)) != 0 | |
691 | + || copy_to_user((long *) (&hd->start), &start, | |
692 | + sizeof (start)) != 0) { | |
693 | + return -EFAULT; | |
694 | + } | |
695 | + } | |
696 | + break; | |
697 | + | |
698 | + case EVMS_QUIESCE_VOLUME: | |
699 | + break; | |
700 | + | |
701 | + case EVMS_GET_DISK_LIST: | |
702 | + case EVMS_CHECK_MEDIA_CHANGE: | |
703 | + case EVMS_REVALIDATE_DISK: | |
704 | + case EVMS_OPEN_VOLUME: | |
705 | + case EVMS_CLOSE_VOLUME: | |
706 | + case EVMS_CHECK_DEVICE_STATUS: | |
707 | + { | |
708 | + // These five ioctl all need to be broadcast to all PVs. | |
709 | + struct aix_volume_group *group = volume->group; | |
710 | + struct partition_list_entry *partition; | |
711 | + for (partition = group->partition_list; partition; | |
712 | + partition = partition->next) { | |
713 | + rc |= | |
714 | + IOCTL(partition->logical_node, inode, file, | |
715 | + cmd, arg); | |
716 | + } | |
717 | + } | |
718 | + break; | |
719 | + | |
720 | + default: | |
721 | + // Currently the VGE does not send any ioctl's down to the | |
722 | + // partitions. Which partition would they go to? | |
723 | + rc = -ENOTTY; | |
724 | + } | |
725 | + | |
726 | + return rc; | |
727 | +} | |
728 | + | |
729 | +/* Function: aix_direct_ioctl | |
730 | + * | |
731 | + * This function provides a method for user-space to communicate directly | |
732 | + * with a plugin in the kernel. | |
733 | + */ | |
734 | +static int | |
735 | +aix_direct_ioctl(struct inode *inode, | |
736 | + struct file *file, unsigned int cmd, unsigned long args) | |
737 | +{ | |
738 | + struct aix_logical_volume *volume = NULL; | |
739 | + struct evms_plugin_ioctl_pkt argument; | |
740 | + int rc = 0; | |
741 | + | |
742 | + MOD_INC_USE_COUNT; | |
743 | + LOG_DEBUG(" Function:%s cmd:%d \n", __FUNCTION__, cmd); | |
744 | + | |
745 | + // Copy user's parameters to kernel space | |
746 | + if (copy_from_user | |
747 | + (&argument, (struct evms_plugin_ioctl *) args, sizeof (argument))) { | |
748 | + MOD_DEC_USE_COUNT; | |
749 | + return -EFAULT; | |
750 | + } | |
751 | + // Make sure this is supposed to be our ioctl. | |
752 | + if (argument.feature_id != plugin_header.id) { | |
753 | + MOD_DEC_USE_COUNT; | |
754 | + return -EINVAL; | |
755 | + } | |
756 | + | |
757 | + argument.feature_command = 1; | |
758 | + | |
759 | + switch (argument.feature_command) { | |
760 | + | |
761 | + case EVMS_AIX_RESYNC_MIRRORS: | |
762 | + { | |
763 | + struct aix_volume_resync_ioctl aix_lv_resync; | |
764 | + | |
765 | + if (copy_from_user | |
766 | + (&aix_lv_resync, | |
767 | + (struct aix_volume_resync_ioctl *) argument. | |
768 | + feature_ioctl_data, sizeof (aix_lv_resync))) { | |
769 | + rc = -EINVAL; | |
770 | + break; | |
771 | + } | |
772 | + | |
773 | + volume = AIX_get_volume_data(aix_lv_resync.object_name); | |
774 | + | |
775 | + if (volume) { | |
776 | + AIX_schedule_resync(volume, FALSE); | |
777 | + } else { | |
778 | + LOG_DEBUG | |
779 | + (" Function:%s object_name:%s -- no match found\n", | |
780 | + __FUNCTION__, aix_lv_resync.object_name); | |
781 | + rc = -EINVAL; | |
782 | + } | |
783 | + | |
784 | + } | |
785 | + break; | |
786 | + | |
787 | + default: | |
788 | + rc = -EINVAL; | |
789 | + break; | |
790 | + } | |
791 | + | |
792 | + argument.status = rc; | |
793 | + copy_to_user((struct evms_plugin_ioctl *) args, &argument, | |
794 | + sizeof (argument)); | |
795 | + MOD_DEC_USE_COUNT; | |
796 | + return rc; | |
797 | +} | |
798 | + | |
799 | +/* Function: aix_direct_ioctl | |
800 | + * | |
801 | + * This function provides a method for user-space to communicate directly | |
802 | + * with a plugin in the kernel. | |
803 | + */ | |
804 | +static struct aix_logical_volume * | |
805 | +AIX_get_volume_data(char *object_name) | |
806 | +{ | |
807 | + | |
808 | + struct aix_volume_group *VG_ptr; | |
809 | + struct aix_logical_volume *volume = NULL; | |
810 | + int i; | |
811 | + | |
812 | + LOG_DEBUG(" Function:%s object_name:%s \n", __FUNCTION__, object_name); | |
813 | + | |
814 | + if (!object_name || !strlen(object_name)) { | |
815 | + return NULL; | |
816 | + } | |
817 | + | |
818 | + for (VG_ptr = AIXVolumeGroupList; VG_ptr; VG_ptr = VG_ptr->next) { | |
819 | + for (i = 0; VG_ptr->volume_list[i]; i++) { | |
820 | + if (!strcmp(VG_ptr->volume_list[i]->name, object_name)) { | |
821 | + LOG_DEBUG | |
822 | + (" Function:%s FOUND!! volume_name:%s \n", | |
823 | + __FUNCTION__, | |
824 | + VG_ptr->volume_list[i]->name); | |
825 | + volume = VG_ptr->volume_list[i]; | |
826 | + break; | |
827 | + } | |
828 | + } | |
829 | + } | |
830 | + | |
831 | + if (!volume) { | |
832 | + LOG_DEBUG(" Function:%s object_name:%s NOT FOUND !! volume:%p \n", | |
833 | + __FUNCTION__, object_name, volume); | |
834 | + } | |
835 | + | |
836 | + return volume; | |
837 | +} | |
838 | + | |
839 | +/* | |
840 | + * Function: init_io_aix | |
841 | + * | |
842 | + */ | |
843 | +static int | |
844 | +init_io_aix(struct evms_logical_node *node, int io_flag, /* 0=read, 1=write */ | |
845 | + u64 sect_nr, /* disk LBA */ | |
846 | + u64 num_sects, /* # of sectors */ | |
847 | + void *buf_addr) | |
848 | +{ /* buffer address */ | |
849 | + struct partition_list_entry *partition; | |
850 | + u64 new_sector = 0; | |
851 | + u64 new_size = 0; | |
852 | + int rc = 0; | |
853 | + u32 le, offset; | |
854 | + | |
855 | + LOG_DEBUG(" ************ init_io_aix() num_sects:" PFU64 | |
856 | + " node:%p sect_nr:" PFU64 "\n", num_sects, node, sect_nr); | |
857 | + | |
858 | + // Init IO needs to deal with the possibility that a request can come | |
859 | + // in that spans PEs or stripes. This is possible because there is no | |
860 | + // limit on num_sects. To fix this, we loop through AIX_remap_sector and | |
861 | + // INIT_IO until num_sects reaches zero. | |
862 | + | |
863 | + while (num_sects > 0) { | |
864 | + | |
865 | + if (AIX_remap_sector(node, sect_nr, num_sects, &new_sector, &new_size, | |
866 | + &partition, &le, &offset) || (!new_sector || !partition)) { | |
867 | + LOG_CRITICAL("--- Error returned from AIX_remap_sector %d\n", | |
868 | + __LINE__); | |
869 | + return -EIO; | |
870 | + } | |
871 | + | |
872 | + LOG_DEBUG(" init_io_aix() line:%d logical_node:%p io_flag:%d new_sector:" | |
873 | + PFU64 " new_size:" PFU64 "\n", __LINE__, | |
874 | + partition->logical_node, io_flag, new_sector, new_size); | |
875 | + | |
876 | + rc = INIT_IO(partition->logical_node, io_flag, new_sector, | |
877 | + new_size, buf_addr); | |
878 | + num_sects -= new_size; | |
879 | + sect_nr += new_size; | |
880 | + buf_addr = (void *) (((unsigned long) buf_addr) + | |
881 | + (unsigned long) (new_size << EVMS_VSECTOR_SIZE_SHIFT)); | |
882 | + } | |
883 | + | |
884 | + return rc; | |
885 | +} | |
886 | + | |
887 | +/* | |
888 | + * Function: AIXlvm_vge_init | |
889 | + * | |
890 | + */ | |
891 | +int __init | |
892 | +AIXlvm_vge_init(void) | |
893 | +{ | |
894 | + | |
895 | + LOG_DEBUG(" %s --------\n", __FUNCTION__); | |
896 | + | |
897 | + MOD_INC_USE_COUNT; | |
898 | + return evms_cs_register_plugin(&plugin_header); /* register with EVMS */ | |
899 | +} | |
900 | + | |
901 | +module_init(AIXlvm_vge_init); | |
902 | + | |
903 | +/********** Required Plugin Functions **********/ | |
904 | + | |
905 | +/* | |
906 | + * Function: discover_aix | |
907 | + * | |
908 | + * This is the entry point into the LVM discovery process. | |
909 | + */ | |
910 | +static int | |
911 | +discover_aix(struct evms_logical_node **evms_logical_disk_head) | |
912 | +{ | |
913 | + int rc = 0, count = 0; | |
914 | + | |
915 | + MOD_INC_USE_COUNT; | |
916 | + LOG_DEBUG("[%s] discover_volume_groups\n", __FUNCTION__); | |
917 | + | |
918 | + rc = discover_volume_groups(evms_logical_disk_head); | |
919 | + | |
920 | + if (rc) { | |
921 | + LOG_ERROR("[%s] discover_volume_groups rc=%d\n", __FUNCTION__,rc); | |
922 | + } | |
923 | + | |
924 | + if (AIXVolumeGroupList && !rc) { | |
925 | + | |
926 | + LOG_DEBUG("[%s] discover_logical_volumes\n", __FUNCTION__); | |
927 | + | |
928 | + rc = discover_logical_volumes(); | |
929 | + | |
930 | + if (rc) { | |
931 | + LOG_ERROR("[%s] discover_logical_volumes rc=%d\n", | |
932 | + __FUNCTION__, rc); | |
933 | + } | |
934 | + | |
935 | + LOG_DEBUG("[%s] export_volumes\n", __FUNCTION__); | |
936 | + | |
937 | + count = export_volumes(evms_logical_disk_head); | |
938 | + | |
939 | + LOG_DEBUG("[%s] export_volumes count=%d\n", __FUNCTION__, | |
940 | + count); | |
941 | + } | |
942 | + | |
943 | + MOD_DEC_USE_COUNT; | |
944 | + return (count); | |
945 | +} | |
946 | + | |
947 | +static int | |
948 | +discover_volume_groups(struct evms_logical_node **evms_logical_disk_head) | |
949 | +{ | |
950 | + struct evms_logical_node *logical_node; | |
951 | + struct evms_logical_node *next_node; | |
952 | + struct aix_ipl_rec_area *AIXpv; | |
953 | + struct AIXlvm_rec *AIXlvm; // Temp holder for the LVM on disk rec | |
954 | + | |
955 | + LOG_DEBUG(" Begin %s\n", __FUNCTION__); | |
956 | + | |
957 | + AIXpv = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
958 | + if (!AIXpv) { | |
959 | + return -ENOMEM; | |
960 | + } | |
961 | + | |
962 | + // We'll create at least one volume entry, if we don't find any AIX volumes we'll clean it up later | |
963 | + | |
964 | + AIXlvm = kmalloc(sizeof (struct AIXlvm_rec), GFP_KERNEL); | |
965 | + if (!AIXlvm) { | |
966 | + kfree(AIXpv); | |
967 | + return -ENOMEM; | |
968 | + } | |
969 | + | |
970 | + for (logical_node = *evms_logical_disk_head; logical_node; | |
971 | + logical_node = next_node) { | |
972 | + | |
973 | + // Grab the next list item in case we remove this partition from the global list. | |
974 | + next_node = logical_node->next; | |
975 | + | |
976 | + // Read the first sector and see if it has a valid AIX PV signature. | |
977 | + | |
978 | + if (INIT_IO(logical_node, 0, 0, 1, AIXpv)) { | |
979 | + // On an I/O error, continue on to the next | |
980 | + // partition. The group that this partition | |
981 | + // belongs to will be incomplete, but we still | |
982 | + // need to discover any other groups. | |
983 | + | |
984 | + LOG_ERROR(" Error reading PV [%p]\n", logical_node); | |
985 | + continue; | |
986 | + } | |
987 | + | |
988 | + if (AIXpv->IPL_record_id == IPLRECID) { | |
989 | + | |
990 | + // This partition is definitely a PV, | |
991 | + // but is it part of a valid VG? | |
992 | + LOG_DEBUG(" DVG removing node from list logical_node %p\n", | |
993 | + logical_node); | |
994 | + | |
995 | + if (INIT_IO(logical_node, 0, PSN_LVM_REC, 1, AIXlvm)) { | |
996 | + LOG_ERROR(" Error reading PV [%p]\n",logical_node); | |
997 | + continue; | |
998 | + } | |
999 | + | |
1000 | + if (AIXlvm->lvm_id == AIX_LVM_LVMID) { | |
1001 | + | |
1002 | + if (validate_build_volume_group_disk_info( | |
1003 | + logical_node, AIXlvm)) { | |
1004 | + // Again, continue on and we'll | |
1005 | + // clean up later. | |
1006 | + continue; | |
1007 | + } | |
1008 | + | |
1009 | + evms_cs_remove_logical_node_from_list( | |
1010 | + evms_logical_disk_head, logical_node); | |
1011 | + | |
1012 | + } else { | |
1013 | + LOG_DEBUG(" Found an AIX PV with no parent LVM (LVM ID: %d)\n", | |
1014 | + AIXlvm->lvm_id); | |
1015 | + continue; | |
1016 | + } | |
1017 | + } else { | |
1018 | + LOG_DEBUG(" Found a PV not belonging to AIX [%p]\n", | |
1019 | + logical_node); | |
1020 | + } | |
1021 | + } | |
1022 | + | |
1023 | + AIX_VOLUME_GROUP_DUMP(); | |
1024 | + | |
1025 | + if (check_volume_groups()) { | |
1026 | + return -EINVAL; | |
1027 | + } | |
1028 | + | |
1029 | + kfree(AIXpv); | |
1030 | + kfree(AIXlvm); | |
1031 | + | |
1032 | + return 0; | |
1033 | +} | |
1034 | + | |
1035 | +/* | |
1036 | + * Function: validate_build_volume_group_disk_info | |
1037 | + * | |
1038 | + * Creates and validates the volume groups found on the disk structures. | |
1039 | + * | |
1040 | + */ | |
1041 | +static int | |
1042 | +validate_build_volume_group_disk_info(struct evms_logical_node *logical_node, | |
1043 | + struct AIXlvm_rec *AIXlvm) | |
1044 | +{ | |
1045 | + | |
1046 | + struct aix_volume_group *AIXVGLptr = AIXVolumeGroupList; | |
1047 | + | |
1048 | + LOG_DEBUG(" VBVGDI pv_num:%d\n", AIXlvm->pv_num); | |
1049 | + | |
1050 | + while (AIXVGLptr) { | |
1051 | + if (COMPARE_UNIQUE_IDS(AIXlvm->vg_id, AIXVGLptr->vg_id)) { | |
1052 | + break; | |
1053 | + } | |
1054 | + AIXVGLptr = AIXVGLptr->next; // There is more than one so walk the list | |
1055 | + } | |
1056 | + | |
1057 | + if (!AIXVGLptr) { | |
1058 | + LOG_DEBUG(" VBVGDI AIXVGLptr:%p line:%d\n", AIXVGLptr,__LINE__); | |
1059 | + AIXVGLptr = AIX_create_volume_group(logical_node, AIXlvm); | |
1060 | + if (AIXVGLptr) { | |
1061 | + AIXVGLptr->next = AIXVolumeGroupList; | |
1062 | + AIXVolumeGroupList = AIXVGLptr; | |
1063 | + } | |
1064 | + } else { | |
1065 | + LOG_DEBUG(" VBVGDI Rediscover AIXVGLptr:%p line:%d\n", | |
1066 | + AIXVGLptr, __LINE__); | |
1067 | + | |
1068 | + if (AIX_update_volume_group(AIXVGLptr, logical_node, AIXlvm)) { | |
1069 | + LOG_DEBUG | |
1070 | + (" VBVGDI ERROR on Rediscover AIXVGLptr:%p line:%d\n", | |
1071 | + AIXVGLptr, __LINE__); | |
1072 | + } | |
1073 | + } | |
1074 | + | |
1075 | + if (!AIXVGLptr) { | |
1076 | + | |
1077 | + LOG_DEBUG(" VBVGDI AIXVGLptr:%p line:%d\n", AIXVGLptr, | |
1078 | + __LINE__); | |
1079 | + LOG_DEBUG(" VBVGDI flags:%d\n", AIXVGLptr->flags); | |
1080 | + LOG_CRITICAL("Unable to allocate volume group data struct Volume Group Corruption !!\n"); | |
1081 | + return -EINVAL; | |
1082 | + } else { | |
1083 | + | |
1084 | + LOG_DEBUG(" VBVGDI AIXVolumeGroupList:%p line:%d\n", | |
1085 | + AIXVolumeGroupList, __LINE__); | |
1086 | + LOG_DEBUG(" VBVGDI AIXVGLptr:%p line:%d\n", AIXVGLptr, | |
1087 | + __LINE__); | |
1088 | + LOG_DEBUG(" VBVGDI flags:%d\n", AIXVGLptr->flags); | |
1089 | + | |
1090 | + if (add_PV_to_volume_group(AIXVGLptr, logical_node, AIXlvm->pv_num)) { | |
1091 | + return -EINVAL; | |
1092 | + } | |
1093 | + } | |
1094 | + | |
1095 | + return 0; | |
1096 | +} | |
1097 | + | |
1098 | +/* | |
1099 | + * Function: add_VG_data_to_VG_list | |
1100 | + * | |
1101 | + * Allocate space for a new LVM volume group and all of its sub-fields. | |
1102 | + * Initialize the appropriate fields. | |
1103 | + */ | |
1104 | + | |
1105 | +static int | |
1106 | +add_VG_data_to_VG_list(struct evms_logical_node *logical_node, | |
1107 | + struct aix_volume_group *new_group, short int pvNum) | |
1108 | +{ | |
1109 | +// int pvh_pos; | |
1110 | + | |
1111 | +// struct pv_header *AIXpvh; | |
1112 | + | |
1113 | + // The array of pointer to the logical volumes. | |
1114 | + // Leave this allocation at the max permitted, the lv numbering may not be sequential so you may have gaps | |
1115 | + // in the array allocation i.e. 1,2,3,4,5,6,7,8,11,15,21,33 etc. even though you only have 12 LVs. | |
1116 | + | |
1117 | + LOG_DEBUG(" AVGDVGL Entering pvNum:%d vgda_PSN:%d\n", pvNum, | |
1118 | + new_group->vgda_psn); | |
1119 | + | |
1120 | +// pvh_pos = AIX_PVH_DATA_PSN(new_group->vgda_psn, pvNum); | |
1121 | + | |
1122 | +/* AIXpvh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1123 | + if (!AIXpvh) { | |
1124 | + return -ENOMEM; | |
1125 | + } | |
1126 | + | |
1127 | + memset(AIXpvh, 0, AIX_SECTOR_SIZE); | |
1128 | + | |
1129 | + LOG_DEBUG(" AVGDVGL pvh_pos:%d\n", pvh_pos); | |
1130 | + | |
1131 | + if (INIT_IO(logical_node, 0, pvh_pos, 1, AIXpvh)) { | |
1132 | + return -EIO; | |
1133 | + } | |
1134 | + | |
1135 | + LOG_DEBUG(" AVGDVGL AIXpvh->pv_num:%d\n", pvNum); | |
1136 | +*/ | |
1137 | + if (!new_group->volume_list) { | |
1138 | + new_group->volume_list = | |
1139 | + kmalloc(LVM_MAXLVS * sizeof (struct aix_logical_volume *), | |
1140 | + GFP_KERNEL); | |
1141 | + if (!new_group->volume_list) { | |
1142 | +// kfree(AIXpvh); | |
1143 | + return -ENOMEM; | |
1144 | + } | |
1145 | + memset(new_group->volume_list, 0, | |
1146 | + (LVM_MAXLVS * sizeof (struct aix_logical_volume *))); | |
1147 | + } | |
1148 | + | |
1149 | + new_group->vg_id.word1 = new_group->AIXvgh->vg_id.word1; | |
1150 | + new_group->vg_id.word2 = new_group->AIXvgh->vg_id.word2; | |
1151 | + new_group->vg_id.word3 = new_group->AIXvgh->vg_id.word3; | |
1152 | + new_group->vg_id.word4 = new_group->AIXvgh->vg_id.word4; | |
1153 | +// new_group->numpvs = new_group->AIXvgh->numpvs; | |
1154 | +// new_group->numlvs = new_group->AIXvgh->numlvs; | |
1155 | +// new_group->lv_max = new_group->AIXvgh->maxlvs; | |
1156 | + new_group->pe_size = GET_PHYSICAL_PART_SIZE(new_group->AIXvgh->pp_size) / | |
1157 | + AIX_SECTOR_SIZE; | |
1158 | + | |
1159 | +// new_group->block_size = 0; | |
1160 | +// new_group->hard_sect_size = 0; | |
1161 | + new_group->flags |= AIX_VG_DIRTY; | |
1162 | + | |
1163 | +// kfree(AIXpvh); | |
1164 | + | |
1165 | + LOG_DEBUG(" AVGDVGL Vol Group ID %x\n", new_group->vg_id.word2); | |
1166 | + | |
1167 | + return 0; | |
1168 | +} | |
1169 | + | |
1170 | +/* | |
1171 | + * Function: add_PV_to_volume_group | |
1172 | + * | |
1173 | + * Create a new partition_list_entry for the specified volume group. | |
1174 | + * Initialize the new partition with the evms node and lvm pv information, | |
1175 | + * and add the new partition to the group's list. | |
1176 | + */ | |
1177 | + | |
1178 | +static int | |
1179 | +add_PV_to_volume_group(struct aix_volume_group *group, | |
1180 | + struct evms_logical_node *evms_partition, int pvNum) | |
1181 | +{ | |
1182 | + struct partition_list_entry *new_partition; | |
1183 | + | |
1184 | + LOG_DEBUG(" APVVG Entering pvNum:%d\n", pvNum); | |
1185 | + | |
1186 | + group->flags |= AIX_VG_DIRTY; | |
1187 | + | |
1188 | + for (new_partition = group->partition_list; new_partition != NULL; | |
1189 | + new_partition = new_partition->next) { | |
1190 | + if (new_partition->logical_node == evms_partition) { | |
1191 | + return 0; | |
1192 | + } | |
1193 | + } | |
1194 | + | |
1195 | + new_partition = | |
1196 | + kmalloc(sizeof (struct partition_list_entry), GFP_KERNEL); | |
1197 | + if (!new_partition) { | |
1198 | + return -ENOMEM; | |
1199 | + } | |
1200 | + | |
1201 | + memset(new_partition, 0, sizeof (struct partition_list_entry)); | |
1202 | + | |
1203 | + // Add this partition to this group's list. | |
1204 | + new_partition->logical_node = evms_partition; | |
1205 | + new_partition->pv_number = pvNum; | |
1206 | + | |
1207 | + if (evms_partition->hardsector_size > group->hard_sect_size) { | |
1208 | + group->hard_sect_size = evms_partition->hardsector_size; | |
1209 | + } | |
1210 | + if (evms_partition->block_size > group->block_size) { | |
1211 | + group->block_size = evms_partition->block_size; | |
1212 | + } | |
1213 | + | |
1214 | + // Add this partition to the beginning of its group's list. | |
1215 | + new_partition->next = group->partition_list; | |
1216 | + group->partition_list = new_partition; | |
1217 | + group->partition_count++; | |
1218 | + | |
1219 | + LOG_DEBUG(" APVVG partition_count:%d pv_num:%d\n", | |
1220 | + group->partition_count, pvNum); | |
1221 | + | |
1222 | + return 0; | |
1223 | +} | |
1224 | + | |
1225 | +/**************************************************** | |
1226 | +* | |
1227 | +* | |
1228 | +* | |
1229 | +*****************************************************/ | |
1230 | +static struct aix_volume_group * | |
1231 | +AIX_create_volume_group(struct evms_logical_node *logical_node, | |
1232 | + struct AIXlvm_rec *AIXlvm) | |
1233 | +{ | |
1234 | + struct vg_header *AIXvgh = NULL, *AIXvgh2 = NULL; | |
1235 | + struct vg_trailer *AIXvgt = NULL, *AIXvgt2 = NULL; | |
1236 | + struct aix_volume_group *AIXVGLptr; | |
1237 | + | |
1238 | + AIXvgh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1239 | + if (!AIXvgh) { | |
1240 | + return NULL; | |
1241 | + } | |
1242 | + | |
1243 | + AIXvgh2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1244 | + if (!AIXvgh2) { | |
1245 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1246 | + return NULL; | |
1247 | + } | |
1248 | + | |
1249 | + AIXvgt = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1250 | + if (!AIXvgt) { | |
1251 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1252 | + return NULL; | |
1253 | + } | |
1254 | + | |
1255 | + AIXvgt2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1256 | + if (!AIXvgt2) { | |
1257 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1258 | + return NULL; | |
1259 | + } | |
1260 | + | |
1261 | + memset(AIXvgh, 0, AIX_SECTOR_SIZE); | |
1262 | + memset(AIXvgh2, 0, AIX_SECTOR_SIZE); | |
1263 | + memset(AIXvgt, 0, AIX_SECTOR_SIZE); | |
1264 | + memset(AIXvgt2, 0, AIX_SECTOR_SIZE); | |
1265 | + | |
1266 | + // First time thru we want to read this in, we may only have one PV in this group, all others | |
1267 | + // may be corrupt, etc. If the info is clean we shouldn't get here. | |
1268 | + | |
1269 | + if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[0], 1, AIXvgh)) { | |
1270 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1271 | + return NULL; | |
1272 | + } | |
1273 | + | |
1274 | + if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[1], 1, AIXvgh2)) { | |
1275 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1276 | + return NULL; | |
1277 | + } | |
1278 | + | |
1279 | + if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1), 1, | |
1280 | + AIXvgt)) { | |
1281 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1282 | + return NULL; | |
1283 | + } | |
1284 | + | |
1285 | + if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1), 1, | |
1286 | + AIXvgt2)) { | |
1287 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1288 | + return NULL; | |
1289 | + } | |
1290 | + | |
1291 | + LOG_DEBUG("CVG AIXvgh->vgda_psn[%d]:%d\n", 0, AIXlvm->vgda_psn[0]); | |
1292 | + LOG_DEBUG("CVG AIXvgh->vgda_psn[%d]:%d\n", 1, AIXlvm->vgda_psn[1]); | |
1293 | + LOG_DEBUG("CVG AIXvgt psn[%d]:%d\n", 0,(AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1)); | |
1294 | + LOG_DEBUG("CVG AIXvgt psn[%d]:%d\n", 1,(AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1)); | |
1295 | + LOG_DEBUG("CVG Allocating AIXVGLptr:size:%d \n",(int) sizeof (struct aix_volume_group)); | |
1296 | + | |
1297 | + AIXVGLptr = kmalloc(sizeof (struct aix_volume_group), GFP_KERNEL); | |
1298 | + if (!AIXVGLptr) { | |
1299 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1300 | + return NULL; | |
1301 | + } | |
1302 | + memset(AIXVGLptr, 0, sizeof (struct aix_volume_group)); | |
1303 | + | |
1304 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1305 | + AIXVGLptr->flags |= AIX_VG_DIRTY; | |
1306 | + | |
1307 | + LOG_DEBUG("CVG AIXVGLptr:%p line %d\n", AIXVGLptr, __LINE__); | |
1308 | + | |
1309 | + AIXVGLptr->AIXvgh = kmalloc(sizeof (struct vg_header), GFP_KERNEL); | |
1310 | + if (!AIXVGLptr->AIXvgh) { | |
1311 | + kfree(AIXVGLptr); | |
1312 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1313 | + return NULL; | |
1314 | + } | |
1315 | + memset(AIXVGLptr->AIXvgh, 0, sizeof (struct vg_header)); | |
1316 | + | |
1317 | + LOG_DEBUG("CVG COMP TS AIXVGLptr->CleanVGInfo:%d \n", | |
1318 | + AIXVGLptr->CleanVGInfo); | |
1319 | + | |
1320 | + if (AIXVGLptr->CleanVGInfo == AIX_PV_STATE_INVALID) { | |
1321 | + if (COMPARE_TIMESTAMPS(AIXvgh->vg_timestamp, AIXvgt->timestamp)) { | |
1322 | + if (COMPARE_TIMESTAMPS | |
1323 | + (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) { | |
1324 | + if (COMPARE_TIMESTAMPS | |
1325 | + (AIXvgh->vg_timestamp, | |
1326 | + AIXvgh2->vg_timestamp)) { | |
1327 | + // All timestamps match. Yea! | |
1328 | + AIXVGLptr->CleanVGInfo = | |
1329 | + AIX_PV_STATE_VALID; | |
1330 | + } else { | |
1331 | + // Both VGDAs are good, but timestamps are | |
1332 | + // different. Can't tell yet which one is | |
1333 | + // correct. | |
1334 | + AIXVGLptr->CleanVGInfo = | |
1335 | + AIX_PV_STATE_EITHER_VGDA; | |
1336 | + } | |
1337 | + } else { | |
1338 | + // First VGDA is good, second is bad. | |
1339 | + AIXVGLptr->CleanVGInfo = | |
1340 | + AIX_PV_STATE_FIRST_VGDA; | |
1341 | + } | |
1342 | + } else { | |
1343 | + if (COMPARE_TIMESTAMPS | |
1344 | + (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) { | |
1345 | + // First VGDA is bad, second is good. | |
1346 | + AIXVGLptr->CleanVGInfo = | |
1347 | + AIX_PV_STATE_SECOND_VGDA; | |
1348 | + } else if (AIXvgh->numpvs == 1) { // We only have 1 PV in this group, mismatch or not this will have to do | |
1349 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_VALID; | |
1350 | + } else { | |
1351 | + // This should never happen. | |
1352 | + LOG_DEBUG("All four VG timestamps for %d are different. What happened?!?\n", | |
1353 | + AIXVGLptr->vg_id.word2); | |
1354 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1355 | + | |
1356 | + } | |
1357 | + } | |
1358 | + | |
1359 | + LOG_DEBUG("CVG SWITCH TS AIXVGLptr->CleanVGInfo:%d \n", | |
1360 | + AIXVGLptr->CleanVGInfo); | |
1361 | + | |
1362 | + switch (AIXVGLptr->CleanVGInfo) { | |
1363 | + case AIX_PV_STATE_VALID: | |
1364 | + case AIX_PV_STATE_FIRST_VGDA: | |
1365 | + | |
1366 | + LOG_DEBUG("CVG SWITCH VALID %d size:%d\n", | |
1367 | + AIXVGLptr->CleanVGInfo, | |
1368 | + (int) sizeof (struct vg_header)); | |
1369 | + | |
1370 | + AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh); // Get the info. we need | |
1371 | + | |
1372 | + AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0]; | |
1373 | + AIXVGLptr->vgda_len = AIXlvm->vgda_len; | |
1374 | + break; | |
1375 | + | |
1376 | + case AIX_PV_STATE_SECOND_VGDA: | |
1377 | + LOG_DEBUG("CVG SWITCH SECOND VGDA %d size:%d\n", | |
1378 | + AIXVGLptr->CleanVGInfo, | |
1379 | + (int) sizeof (struct vg_header)); | |
1380 | + | |
1381 | + AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh2); // Get the info. we need | |
1382 | + | |
1383 | + AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[1]; | |
1384 | + AIXVGLptr->vgda_len = AIXlvm->vgda_len; | |
1385 | + break; | |
1386 | + | |
1387 | + case AIX_PV_STATE_EITHER_VGDA: | |
1388 | + LOG_DEBUG("CVG SWITCH EITHER VGDA %d size:%d\n", | |
1389 | + AIXVGLptr->CleanVGInfo,(int) sizeof (struct vg_header)); | |
1390 | + if (COMPARE_UNIQUE_IDS(AIXvgh->vg_id, AIXvgh2->vg_id)) { | |
1391 | + | |
1392 | + AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh); // Get the info. we need | |
1393 | + | |
1394 | + AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0]; | |
1395 | + AIXVGLptr->vgda_len = AIXlvm->vgda_len; | |
1396 | + } else { | |
1397 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1398 | + // Not sure where this PV belongs. It thinks it is | |
1399 | + // supposed to be in two different containers. We will | |
1400 | + // probably need to put this on a separate, temporary | |
1401 | + // list, and determine later which container is missing | |
1402 | + // a PV. | |
1403 | + } | |
1404 | + break; | |
1405 | + | |
1406 | + default: | |
1407 | + LOG_ERROR("Invalid PV state (%d) for %d\n", | |
1408 | + AIXVGLptr->CleanVGInfo, | |
1409 | + AIXVGLptr->vg_id.word2); | |
1410 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1411 | + break; | |
1412 | + } | |
1413 | + | |
1414 | + } | |
1415 | + | |
1416 | + // Currently AIX Big VGDA is not supported - cleanup and return NULL so this VG doesn't get added | |
1417 | + | |
1418 | + if (AIXVGLptr->AIXvgh->bigvg != 0) { | |
1419 | + LOG_SERIOUS("Error creating Volume Group AIX Big VGDA is not currently supported\n"); | |
1420 | + if (AIXVGLptr->AIXvgh) { | |
1421 | + kfree(AIXVGLptr->AIXvgh); | |
1422 | + AIXVGLptr->AIXvgh = NULL; | |
1423 | + } | |
1424 | + | |
1425 | + if (AIXVGLptr) { | |
1426 | + kfree(AIXVGLptr); | |
1427 | + AIXVGLptr = NULL; | |
1428 | + } | |
1429 | + | |
1430 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1431 | + return NULL; | |
1432 | + } | |
1433 | + | |
1434 | + add_VG_data_to_VG_list(logical_node, AIXVGLptr, AIXlvm->pv_num); | |
1435 | + | |
1436 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1437 | + | |
1438 | + LOG_DEBUG("CVG Exiting CleanVGInfo:%d\n", AIXVGLptr->CleanVGInfo); | |
1439 | + | |
1440 | + return AIXVGLptr; | |
1441 | +} | |
1442 | + | |
1443 | +/**************************************************** | |
1444 | +* | |
1445 | +* | |
1446 | +* | |
1447 | +*****************************************************/ | |
1448 | +static int | |
1449 | +AIX_update_volume_group(struct aix_volume_group *AIXVGLptr, | |
1450 | + struct evms_logical_node *logical_node, | |
1451 | + struct AIXlvm_rec *AIXlvm) | |
1452 | +{ | |
1453 | + struct vg_header *AIXvgh = NULL, *AIXvgh2 = NULL; | |
1454 | + struct vg_trailer *AIXvgt = NULL, *AIXvgt2 = NULL; | |
1455 | + | |
1456 | + AIXvgh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1457 | + if (!AIXvgh) { | |
1458 | + return -ENOMEM; | |
1459 | + } | |
1460 | + | |
1461 | + AIXvgh2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1462 | + if (!AIXvgh2) { | |
1463 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1464 | + return -ENOMEM; | |
1465 | + } | |
1466 | + | |
1467 | + AIXvgt = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1468 | + if (!AIXvgt) { | |
1469 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1470 | + return -ENOMEM; | |
1471 | + } | |
1472 | + | |
1473 | + AIXvgt2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
1474 | + if (!AIXvgt2) { | |
1475 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1476 | + return -ENOMEM; | |
1477 | + } | |
1478 | + | |
1479 | + // First time thru we want to read this in, we may only have one PV in this group, all others | |
1480 | + // may be corrupt, etc. If the info is clean we shouldn't get here. | |
1481 | + | |
1482 | + if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[0], 1, AIXvgh)) { | |
1483 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1484 | + return -ENOMEM; | |
1485 | + } | |
1486 | + | |
1487 | + if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[1], 1, AIXvgh2)) { | |
1488 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1489 | + return -ENOMEM; | |
1490 | + } | |
1491 | + | |
1492 | + if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1), 1, | |
1493 | + AIXvgt)) { | |
1494 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1495 | + return -ENOMEM; | |
1496 | + } | |
1497 | + | |
1498 | + if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1), 1, | |
1499 | + AIXvgt2)) { | |
1500 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1501 | + return -ENOMEM; | |
1502 | + } | |
1503 | + | |
1504 | + LOG_DEBUG("UVG AIXvgh->vgda_psn[%d]:%d\n", 0, AIXlvm->vgda_psn[0]); | |
1505 | + LOG_DEBUG("UVG AIXvgh->vgda_psn[%d]:%d\n", 1, AIXlvm->vgda_psn[1]); | |
1506 | + LOG_DEBUG("UVG AIXvgt psn[%d]:%d\n", 0,(AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1)); | |
1507 | + LOG_DEBUG("UVG AIXvgt psn[%d]:%d\n", 1,(AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1)); | |
1508 | + | |
1509 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1510 | + AIXVGLptr->flags |= AIX_VG_DIRTY; | |
1511 | + | |
1512 | + LOG_DEBUG("UVG AIXVGLptr:%p line %d\n", AIXVGLptr, __LINE__); | |
1513 | + | |
1514 | + AIXVGLptr->AIXvgh = kmalloc(sizeof (struct vg_header), GFP_KERNEL); | |
1515 | + if (!AIXVGLptr->AIXvgh) { | |
1516 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1517 | + return -ENOMEM; | |
1518 | + } | |
1519 | + memset(AIXVGLptr->AIXvgh, 0, sizeof (struct vg_header)); | |
1520 | + | |
1521 | + LOG_DEBUG("UVG COMP TS AIXVGLptr->CleanVGInfo:%d \n",AIXVGLptr->CleanVGInfo); | |
1522 | + | |
1523 | + if (AIXVGLptr->CleanVGInfo == AIX_PV_STATE_INVALID) { | |
1524 | + if (COMPARE_TIMESTAMPS(AIXvgh->vg_timestamp, AIXvgt->timestamp)) { | |
1525 | + if (COMPARE_TIMESTAMPS | |
1526 | + (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) { | |
1527 | + if (COMPARE_TIMESTAMPS | |
1528 | + (AIXvgh->vg_timestamp, | |
1529 | + AIXvgh2->vg_timestamp)) { | |
1530 | + // All timestamps match. Yea! | |
1531 | + AIXVGLptr->CleanVGInfo = | |
1532 | + AIX_PV_STATE_VALID; | |
1533 | + } else { | |
1534 | + // Both VGDAs are good, but timestamps are | |
1535 | + // different. Can't tell yet which one is | |
1536 | + // correct. | |
1537 | + AIXVGLptr->CleanVGInfo = | |
1538 | + AIX_PV_STATE_EITHER_VGDA; | |
1539 | + } | |
1540 | + } else { | |
1541 | + // First VGDA is good, second is bad. | |
1542 | + AIXVGLptr->CleanVGInfo = | |
1543 | + AIX_PV_STATE_FIRST_VGDA; | |
1544 | + } | |
1545 | + } else { | |
1546 | + if (COMPARE_TIMESTAMPS | |
1547 | + (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) { | |
1548 | + // First VGDA is bad, second is good. | |
1549 | + AIXVGLptr->CleanVGInfo = | |
1550 | + AIX_PV_STATE_SECOND_VGDA; | |
1551 | + } else if (AIXvgh->numpvs == 1) { // We only have 1 PV in this group, mismatch or not this will have to do | |
1552 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_VALID; | |
1553 | + } else { | |
1554 | + // This should never happen. | |
1555 | + LOG_DEBUG | |
1556 | + ("All four VG timestamps for %d are different. What happened?!?\n", | |
1557 | + AIXVGLptr->vg_id.word2); | |
1558 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1559 | + | |
1560 | + } | |
1561 | + } | |
1562 | + | |
1563 | + LOG_DEBUG("UVG SWITCH TS AIXVGLptr->CleanVGInfo:%d \n", | |
1564 | + AIXVGLptr->CleanVGInfo); | |
1565 | + | |
1566 | + switch (AIXVGLptr->CleanVGInfo) { | |
1567 | + case AIX_PV_STATE_VALID: | |
1568 | + case AIX_PV_STATE_FIRST_VGDA: | |
1569 | + | |
1570 | + LOG_DEBUG("UVG SWITCH VALID %d size:%d\n", | |
1571 | + AIXVGLptr->CleanVGInfo, | |
1572 | + (int) sizeof (struct vg_header)); | |
1573 | + | |
1574 | + AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh); // Get the info. we need | |
1575 | + | |
1576 | + AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0]; | |
1577 | + AIXVGLptr->vgda_len = AIXlvm->vgda_len; | |
1578 | + break; | |
1579 | + | |
1580 | + case AIX_PV_STATE_SECOND_VGDA: | |
1581 | + LOG_DEBUG("UVG SWITCH SECOND VGDA %d size:%d\n", | |
1582 | + AIXVGLptr->CleanVGInfo, | |
1583 | + (int) sizeof (struct vg_header)); | |
1584 | + | |
1585 | + AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh2); // Get the info. we need | |
1586 | + | |
1587 | + AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[1]; | |
1588 | + AIXVGLptr->vgda_len = AIXlvm->vgda_len; | |
1589 | + break; | |
1590 | + | |
1591 | + case AIX_PV_STATE_EITHER_VGDA: | |
1592 | + LOG_DEBUG("UVG SWITCH EITHER VGDA %d size:%d\n", | |
1593 | + AIXVGLptr->CleanVGInfo, | |
1594 | + (int) sizeof (struct vg_header)); | |
1595 | + if (COMPARE_UNIQUE_IDS(AIXvgh->vg_id, AIXvgh2->vg_id)) { | |
1596 | + | |
1597 | + AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh); // Get the info. we need | |
1598 | + | |
1599 | + AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0]; | |
1600 | + AIXVGLptr->vgda_len = AIXlvm->vgda_len; | |
1601 | + } else { | |
1602 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1603 | + // Not sure where this PV belongs. It thinks it is | |
1604 | + // supposed to be in two different containers. We will | |
1605 | + // probably need to put this on a separate, temporary | |
1606 | + // list, and determine later which container is missing | |
1607 | + // a PV. | |
1608 | + } | |
1609 | + break; | |
1610 | + | |
1611 | + default: | |
1612 | + LOG_ERROR("UVG Invalid PV state (%d) for %d\n", | |
1613 | + AIXVGLptr->CleanVGInfo, | |
1614 | + AIXVGLptr->vg_id.word2); | |
1615 | + AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID; | |
1616 | + break; | |
1617 | + } | |
1618 | + | |
1619 | + } | |
1620 | + | |
1621 | +// add_VG_data_to_VG_list(logical_node, AIXVGLptr, AIXlvm->pv_num); | |
1622 | + AIXVGLptr->flags |= AIX_VG_DIRTY; | |
1623 | + | |
1624 | + AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2); | |
1625 | + | |
1626 | + LOG_DEBUG("UVG Exiting CleanVGInfo:%d\n", AIXVGLptr->CleanVGInfo); | |
1627 | + | |
1628 | + return 0; | |
1629 | +} | |
1630 | + | |
1631 | +/**************************************************** | |
1632 | +* Function: check_volume_groups | |
1633 | +* | |
1634 | +* We just want to make sure the volume groups have found | |
1635 | +* all their drives. | |
1636 | +* | |
1637 | +* If not, we'll continue and build what we can | |
1638 | +*****************************************************/ | |
1639 | +static int | |
1640 | +check_volume_groups(void) | |
1641 | +{ | |
1642 | + struct aix_volume_group *group; | |
1643 | + struct aix_volume_group *next_group; | |
1644 | +// struct partition_list_entry *partitions; | |
1645 | +// int NumPVS = 0; | |
1646 | + | |
1647 | + LOG_DEBUG("CHVG Checking volume groups:\n"); | |
1648 | + | |
1649 | + | |
1650 | + for (group = AIXVolumeGroupList; group; group = next_group) { | |
1651 | + next_group = group->next; | |
1652 | + | |
1653 | + if (group->flags & AIX_VG_DIRTY){ | |
1654 | + if (group->AIXvgh->numlvs == 0) { | |
1655 | + remove_group_from_list(group); | |
1656 | + deallocate_volume_group(group); | |
1657 | + } else { | |
1658 | + if (group->partition_count != group->AIXvgh->numpvs) { | |
1659 | + group->flags |= AIX_VG_INCOMPLETE; | |
1660 | + LOG_ERROR("CHVG Found incomplete VG !! flags:%x\n", | |
1661 | + group->flags); | |
1662 | + LOG_ERROR("CHVG Found %d PVs should have %d PVs\n", | |
1663 | + group->partition_count, group->AIXvgh->numpvs); | |
1664 | + } | |
1665 | + } | |
1666 | + } | |
1667 | + } | |
1668 | + | |
1669 | + LOG_DEBUG("CHVG Finished Checking volume groups:\n"); | |
1670 | + return 0; | |
1671 | + | |
1672 | +} | |
1673 | + | |
1674 | +/************************************************************************ | |
1675 | + * Function: discover_logical_volumes | |
1676 | + * | |
1677 | + * After all PVs have been claimed and added to the appropriate VG list, | |
1678 | + * the volumes for each VG must be constructed. | |
1679 | + * | |
1680 | + * | |
1681 | + */ | |
1682 | +static int | |
1683 | +discover_logical_volumes(void) | |
1684 | +{ | |
1685 | + | |
1686 | + struct aix_volume_group *AIXVGLPtr; | |
1687 | + struct aix_logical_volume *new_LV; | |
1688 | + struct partition_list_entry *partition; | |
1689 | + struct evms_logical_node *node; | |
1690 | + struct lv_entries *AIXlvent, *AIXlventHead; | |
1691 | + int j, lv_found, all_lvs_found, rc; | |
1692 | + struct namelist *AIXnamelist; | |
1693 | + char *NameBuffer; | |
1694 | + | |
1695 | + AIXlventHead = | |
1696 | + kmalloc(MAX_SECTORS_LV_ENTRIES * AIX_SECTOR_SIZE, GFP_KERNEL); | |
1697 | + if (!AIXlventHead) { | |
1698 | + return -ENOMEM; | |
1699 | + } | |
1700 | + | |
1701 | + memset(AIXlventHead, 0, (MAX_SECTORS_LV_ENTRIES * AIX_SECTOR_SIZE)); | |
1702 | + | |
1703 | + NameBuffer = | |
1704 | + kmalloc(MAX_SECTORS_NAMELIST * AIX_SECTOR_SIZE, GFP_KERNEL); | |
1705 | + if (!NameBuffer) { | |
1706 | + kfree(AIXlventHead); | |
1707 | + return -ENOMEM; | |
1708 | + } | |
1709 | + | |
1710 | + memset(NameBuffer, 0, (MAX_SECTORS_NAMELIST * AIX_SECTOR_SIZE)); | |
1711 | + | |
1712 | + for (AIXVGLPtr = AIXVolumeGroupList; AIXVGLPtr; | |
1713 | + AIXVGLPtr = AIXVGLPtr->next ) { | |
1714 | + | |
1715 | + partition = AIXVGLPtr->partition_list; | |
1716 | + | |
1717 | + if (!(AIXVGLPtr->flags & AIX_VG_DIRTY)) { | |
1718 | + continue; | |
1719 | + } | |
1720 | + | |
1721 | + if (partition == NULL) { | |
1722 | + continue; | |
1723 | + } | |
1724 | + | |
1725 | + node = partition->logical_node; | |
1726 | + | |
1727 | + if (node == NULL) { | |
1728 | + continue; | |
1729 | + } | |
1730 | + | |
1731 | + LOG_DEBUG("DLV INIT_IO AIXNameList position:%d\n", | |
1732 | + ((AIXVGLPtr->vgda_psn + AIXVGLPtr->vgda_len) - 1 - | |
1733 | + MAX_SECTORS_NAMELIST)); | |
1734 | + LOG_DEBUG("AIXVGLPTR:%p partition:%p node:%p \n", AIXVGLPtr, | |
1735 | + partition, node); | |
1736 | + | |
1737 | + if (INIT_IO(node, 0, | |
1738 | + ((AIXVGLPtr->vgda_psn + AIXVGLPtr->vgda_len) - 1 - | |
1739 | + MAX_SECTORS_NAMELIST), MAX_SECTORS_NAMELIST, | |
1740 | + NameBuffer)) { | |
1741 | + continue; | |
1742 | + } | |
1743 | + | |
1744 | + LOG_DEBUG("DLV INIT_IO AIXNameList\n"); | |
1745 | + | |
1746 | + if (INIT_IO(node, 0, AIXVGLPtr->vgda_psn + PSN_LVE_REC, | |
1747 | + MAX_SECTORS_LV_ENTRIES, AIXlventHead)) { | |
1748 | + continue; | |
1749 | + } | |
1750 | + AIXlvent = AIXlventHead; | |
1751 | + AIXnamelist = (struct namelist *) NameBuffer; | |
1752 | + | |
1753 | + LOG_DEBUG("DLV INIT_IO AIXlvent\n"); | |
1754 | + // Search through the LV structs for valid LV entries | |
1755 | + // We're just going to search until all valid LVs are found | |
1756 | + // The max. allowable LVs is 256 and we want don't want to | |
1757 | + // search for 255 if only 8 are defined 1-8 however, there | |
1758 | + // could be gaps in the LV numbering. i.e 1,2,3,4,5,6,7,8, 27,43, etc. | |
1759 | + | |
1760 | + for (j = 0, lv_found = 0, all_lvs_found = FALSE; | |
1761 | + !all_lvs_found && j < LVM_MAXLVS; j++, AIXlvent++) { | |
1762 | + | |
1763 | + LOG_DEBUG(" ** DVIG:lv_size:%d lvname:[%s] j:%d lv_number:%d ** \n", | |
1764 | + AIXlvent->num_lps, AIXnamelist->name[j], j, | |
1765 | + AIXlvent->lvname); | |
1766 | + LOG_DEBUG(" DVIG:stripe_exp:%u stripesize:%u lv_status:%d\n", | |
1767 | + AIXlvent->striping_width, | |
1768 | + GET_PHYSICAL_PART_SIZE(AIXlvent->stripe_exp), | |
1769 | + AIXlvent->lv_state); | |
1770 | + LOG_DEBUG(" DVIG Group:%x.Access:%x\n", | |
1771 | + (unsigned int) AIXVGLPtr->vg_id.word2, | |
1772 | + AIXlvent->permissions); | |
1773 | + LOG_DEBUG(" DVIG mirror:%d mirror_policy:%d mirwrt:%d \n", | |
1774 | + AIXlvent->mirror, AIXlvent->mirror_policy, | |
1775 | + AIXlvent->mirwrt_consist); | |
1776 | + | |
1777 | + // This is the same check we used in "diskedit" and "readdisk" | |
1778 | + if (AIXlvent->lv_state == 0 || | |
1779 | + AIXlvent->permissions > 0x10) { | |
1780 | + continue; | |
1781 | + } | |
1782 | + | |
1783 | + lv_found++; | |
1784 | + if (lv_found == AIXVGLPtr->AIXvgh->numlvs) { | |
1785 | + all_lvs_found = TRUE; | |
1786 | + } | |
1787 | + | |
1788 | + LOG_DEBUG(" DVIG lv_found:%d all_lvs_found:%d \n", | |
1789 | + lv_found, all_lvs_found); | |
1790 | + | |
1791 | + // Create a new logical volume and place it in the appropriate | |
1792 | + // spot in this VG's volume list. For re-discovery, make sure | |
1793 | + // this volume does not already exist. | |
1794 | + if (!AIXVGLPtr->volume_list[AIXlvent->lvname]) { | |
1795 | + new_LV = | |
1796 | + new_logical_volume(AIXlvent, | |
1797 | + AIXVGLPtr, | |
1798 | + AIXnamelist-> | |
1799 | + name[j], | |
1800 | + GET_PHYSICAL_PART_SIZE | |
1801 | + (AIXlvent-> | |
1802 | + stripe_exp)); | |
1803 | + if (!new_LV) { | |
1804 | + continue; | |
1805 | + } | |
1806 | + LOG_DEBUG(" DVIG Adding new logical volume %d to group:%x \n", | |
1807 | + new_LV->lv_number,AIXVGLPtr->vg_id.word2); | |
1808 | + | |
1809 | + AIXVGLPtr->volume_list[new_LV->lv_number] = new_LV; | |
1810 | + } else { | |
1811 | + LOG_DEBUG("DVIG Updating Vol Exists\n"); | |
1812 | + } | |
1813 | + } | |
1814 | + | |
1815 | + // Build the le_to_pe_map for each volume that was discovered above. | |
1816 | + // This has to be done after all volumes in the group are discovered | |
1817 | + if ((rc = build_pe_maps(AIXVGLPtr))) { | |
1818 | + continue; | |
1819 | + } | |
1820 | + | |
1821 | + check_log_volume_and_pe_maps(AIXVGLPtr); | |
1822 | + } | |
1823 | + | |
1824 | + kfree(NameBuffer); | |
1825 | + kfree(AIXlventHead); | |
1826 | + | |
1827 | + return 0; | |
1828 | +} | |
1829 | + | |
1830 | +/* | |
1831 | + * Function: new_logical_volume | |
1832 | + * | |
1833 | + * Allocate space for a new LVM logical volume, including space for the | |
1834 | + * PE map | |
1835 | + */ | |
1836 | +static struct aix_logical_volume * | |
1837 | +new_logical_volume(struct lv_entries *AIXlvent, | |
1838 | + struct aix_volume_group *volume_group, | |
1839 | + char *lv_name, u32 stripesize) | |
1840 | +{ | |
1841 | + | |
1842 | + struct aix_logical_volume *new_volume; | |
1843 | + const char *name = "evms_AIXiod"; | |
1844 | + const char *resync_name = "evms_AIXresync"; | |
1845 | + | |
1846 | + LOG_DEBUG(" NLV: lv_number:%d lv_allocated_le:%d lv_size:%d\n", | |
1847 | + AIXlvent->lvname, AIXlvent->num_lps, | |
1848 | + AIXlvent->num_lps * volume_group->pe_size); | |
1849 | + | |
1850 | + // Allocate space for the new logical volume. | |
1851 | + new_volume = kmalloc(sizeof (struct aix_logical_volume), GFP_KERNEL); | |
1852 | + if (!new_volume) { | |
1853 | + return NULL; | |
1854 | + } | |
1855 | + memset(new_volume, 0, sizeof (struct aix_logical_volume)); | |
1856 | + | |
1857 | + // Allocate space for the LE to PE mapping table | |
1858 | + // We add 1 for the allocated le to ease mapping later on, all AIX le are 1 based | |
1859 | + new_volume->le_to_pe_map = | |
1860 | + kmalloc((AIXlvent->num_lps + 1) * sizeof (struct pe_table_entry), | |
1861 | + GFP_KERNEL); | |
1862 | + if (!new_volume->le_to_pe_map) { | |
1863 | + delete_logical_volume(new_volume); | |
1864 | + return NULL; | |
1865 | + } | |
1866 | + | |
1867 | + memset(new_volume->le_to_pe_map, 0, | |
1868 | + (AIXlvent->num_lps + 1) * sizeof (struct pe_table_entry)); | |
1869 | + | |
1870 | + if (AIXlvent->mirror > AIX_DEFAULT_MIRRORING) { | |
1871 | + new_volume->le_to_pe_map_mir1 = | |
1872 | + kmalloc((AIXlvent->num_lps + | |
1873 | + 1) * sizeof (struct pe_table_entry), GFP_KERNEL); | |
1874 | + if (!new_volume->le_to_pe_map_mir1) { | |
1875 | + delete_logical_volume(new_volume); | |
1876 | + return NULL; | |
1877 | + } | |
1878 | + memset(new_volume->le_to_pe_map_mir1, 0, | |
1879 | + (AIXlvent->num_lps + | |
1880 | + 1) * sizeof (struct pe_table_entry)); | |
1881 | + } | |
1882 | + | |
1883 | + if (AIXlvent->mirror == AIX_MAX_MIRRORS) { | |
1884 | + new_volume->le_to_pe_map_mir2 = | |
1885 | + kmalloc((AIXlvent->num_lps + 1) | |
1886 | + * sizeof (struct pe_table_entry), GFP_KERNEL); | |
1887 | + if (!new_volume->le_to_pe_map_mir2) { | |
1888 | + delete_logical_volume(new_volume); | |
1889 | + return NULL; | |
1890 | + } | |
1891 | + memset(new_volume->le_to_pe_map_mir2, 0, | |
1892 | + (AIXlvent->num_lps +1) | |
1893 | + * sizeof (struct pe_table_entry)); | |
1894 | + } | |
1895 | + | |
1896 | + // Initialize the rest of the new volume. | |
1897 | + new_volume->lv_number = AIXlvent->lvname; | |
1898 | + new_volume->lv_size = AIXlvent->num_lps * (volume_group->pe_size); | |
1899 | + new_volume->lv_access = AIXlvent->permissions | EVMS_LV_NEW; // All volumes start new. | |
1900 | + new_volume->lv_status = AIXlvent->lv_state; | |
1901 | + //new_volume->lv_minor = MINOR(1); | |
1902 | + new_volume->mirror_copies = AIXlvent->mirror; | |
1903 | +// new_volume->mirror_iterations = AIX_DEFAULT_MIRRORING; | |
1904 | + new_volume->stripes = AIXlvent->striping_width; | |
1905 | + new_volume->stripe_size = stripesize; | |
1906 | + new_volume->stripe_size_shift = evms_cs_log2(stripesize); | |
1907 | + new_volume->pe_size = volume_group->pe_size; | |
1908 | + new_volume->pe_size_shift = evms_cs_log2(volume_group->pe_size); | |
1909 | + new_volume->num_le = AIXlvent->num_lps; | |
1910 | +// new_volume->new_volume = TRUE; | |
1911 | + new_volume->group = volume_group; | |
1912 | + | |
1913 | + volume_group->numlvs++; | |
1914 | + | |
1915 | + sprintf(new_volume->name, "aix/%s", lv_name); | |
1916 | + | |
1917 | + if (!AIX_BH_list_pool | |
1918 | + && new_volume->mirror_copies > AIX_DEFAULT_MIRRORING) { | |
1919 | + | |
1920 | + // We only need the ReSync thread if we have at least one mirrored LV. | |
1921 | + // You can't ReSync a non-mirrored drive | |
1922 | + | |
1923 | + AIX_BH_list_pool = | |
1924 | + evms_cs_create_pool(sizeof (struct aix_mirror_bh), | |
1925 | + "EVMS_AIX_BH", aix_notify_cache_ctor, | |
1926 | + NULL); | |
1927 | + if (!AIX_BH_list_pool) { | |
1928 | + return NULL; | |
1929 | + | |
1930 | + AIX_mirror_read_retry_thread = | |
1931 | + evms_cs_register_thread(AIXiod, NULL, name); | |
1932 | + | |
1933 | + AIX_mirror_resync_thread = | |
1934 | + evms_cs_register_thread(AIXresync, NULL, | |
1935 | + resync_name); | |
1936 | + } | |
1937 | + } | |
1938 | + | |
1939 | + LOG_DEBUG("NLV lv_number:%d name:%s lv_size " PFU64 " \n", | |
1940 | + new_volume->lv_number, new_volume->name, new_volume->lv_size); | |
1941 | + LOG_DEBUG("NLV stripe_size:%d stripe_size_shift:%d\n", | |
1942 | + new_volume->stripe_size, new_volume->stripe_size_shift); | |
1943 | + | |
1944 | + return new_volume; | |
1945 | +} | |
1946 | + | |
1947 | +/* | |
1948 | + * Function: aix_notify_cache_ctor | |
1949 | + * this function initializes the b_wait field in the buffer heads | |
1950 | + * in our private buffer head pool. | |
1951 | + */ | |
1952 | +static void | |
1953 | +aix_notify_cache_ctor(void *foo, kmem_cache_t * cachep, unsigned long flags) | |
1954 | +{ | |
1955 | + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | |
1956 | + SLAB_CTOR_CONSTRUCTOR) { | |
1957 | + struct aix_mirror_bh *rbh = (struct aix_mirror_bh *) foo; | |
1958 | + memset(rbh, 0, sizeof (struct aix_mirror_bh)); | |
1959 | + rbh->remaining = (atomic_t) ATOMIC_INIT(0); | |
1960 | + init_waitqueue_head(&rbh->bh_req.b_wait); | |
1961 | + } | |
1962 | +} | |
1963 | + | |
1964 | +/* | |
1965 | + * Function: build_pe_maps | |
1966 | + * | |
1967 | + * After all logical volumes have been discovered, the mappings from | |
1968 | + * logical extents to physical extents must be constructed. Each PV | |
1969 | + * contains a map on-disk of its PEs. Each PE map entry contains the | |
1970 | + * logical volume number and the logical extent number on that volume. | |
1971 | + * Our internal map is the reverse of this map for each volume, listing | |
1972 | + * the PV node and sector offset for every logical extent on the volume. | |
1973 | + */ | |
1974 | +static int | |
1975 | + build_pe_maps(struct aix_volume_group *volume_group) | |
1976 | +{ | |
1977 | + struct partition_list_entry *partition; | |
1978 | + struct partition_list_entry *mirror_partition; | |
1979 | + struct pp_entries *AIXppent, *AIXppent_buff; | |
1980 | + struct pv_header *AIXpvh; | |
1981 | + u64 offset; | |
1982 | + u32 le_number; | |
1983 | + u32 j, pp_count, pvh_pos; | |
1984 | + u32 MirrorFound; | |
1985 | + u32 pvh_posn[LVM_MAXPVS]; | |
1986 | + u32 rc; | |
1987 | +#ifdef EVMS_DEBUG_MIRRORS | |
1988 | + u32 lv_found, all_lvs_found; | |
1989 | + u32 mirs = 0; | |
1990 | +#endif | |
1991 | + | |
1992 | + LOG_DEBUG(" *** BPEM ***\n"); | |
1993 | + // For every partition in this VG | |
1994 | + | |
1995 | + AIXppent_buff = kmalloc(AIX_SECTOR_SIZE * PHYS_VOL_OFFSET, GFP_KERNEL); | |
1996 | + if (!AIXppent_buff) { | |
1997 | + return -ENOMEM; | |
1998 | + } | |
1999 | + | |
2000 | + memset(AIXppent_buff, 0, AIX_SECTOR_SIZE * PHYS_VOL_OFFSET); | |
2001 | + memset(pvh_posn, 0, LVM_MAXPVS); | |
2002 | + | |
2003 | + AIXpvh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
2004 | + if (!AIXpvh) { | |
2005 | + kfree(AIXppent_buff); | |
2006 | + return -ENOMEM; | |
2007 | + } | |
2008 | + | |
2009 | + memset(AIXpvh, 0, AIX_SECTOR_SIZE); | |
2010 | + | |
2011 | + LOG_DEBUG(" BPEM AIXppent_buff:%d \n", | |
2012 | + (AIX_SECTOR_SIZE * PHYS_VOL_OFFSET)); | |
2013 | + | |
2014 | + // This next section is to calculate the sector spacing between PV info for the VG | |
2015 | + // AIX doesn't always space the info. the same. It could be 17 or 34 sectors apart | |
2016 | + // depending on the PE size selected. | |
2017 | + | |
2018 | + rc = AIX_pvh_data_posn(volume_group->vgda_psn, pvh_posn, volume_group->partition_list, volume_group->AIXvgh->numpvs); | |
2019 | + | |
2020 | + if (rc != 0) { | |
2021 | + kfree(AIXppent_buff); | |
2022 | + kfree(AIXpvh); | |
2023 | + return (rc); | |
2024 | + } | |
2025 | + | |
2026 | + for (partition = volume_group->partition_list; partition; | |
2027 | + partition = partition->next) { | |
2028 | + | |
2029 | + LOG_DEBUG(" BPEM partition:%p next:%p\n", partition, | |
2030 | + partition->next); | |
2031 | + | |
2032 | + pvh_pos = pvh_posn[partition->pv_number]; | |
2033 | + | |
2034 | + LOG_DEBUG(" BPEM pvh_pos:%d pv_number:%d\n", pvh_pos, partition->pv_number); | |
2035 | + | |
2036 | + if (INIT_IO(partition->logical_node, 0, pvh_pos, 1, AIXpvh)) { | |
2037 | + kfree(AIXppent_buff); | |
2038 | + kfree(AIXpvh); | |
2039 | + return -EIO; | |
2040 | + } | |
2041 | + // For every entry in the PE map, calculate the PE's sector offset | |
2042 | + // and update the correct LV's PE map. LV number of 0 marks an unused PE. | |
2043 | + // For re-discovery, only compute entries for new volumes. | |
2044 | + | |
2045 | + if (INIT_IO(partition->logical_node, 0, pvh_pos, AIX_PVHPP_LENGTH, | |
2046 | + AIXppent_buff)) { | |
2047 | + kfree(AIXppent_buff); | |
2048 | + kfree(AIXpvh); | |
2049 | + return -EIO; | |
2050 | + } | |
2051 | + | |
2052 | + AIXppent = AIXppent_buff; | |
2053 | + AIXppent++; | |
2054 | + | |
2055 | + pp_count = AIXpvh->pp_count; | |
2056 | + | |
2057 | + LOG_DEBUG("BPEM AIXpvh data: pp_count:%d psn_part1:%d pv_id1:%d pv_id2:%d pv_id3:%d pv_id4:%d pv_num:%d pv_state:%d vgdas:%d res1:%d res2:%d\n", AIXpvh->pp_count, | |
2058 | + AIXpvh->psn_part1, | |
2059 | + AIXpvh->pv_id.word1, | |
2060 | + AIXpvh->pv_id.word2, | |
2061 | + AIXpvh->pv_id.word3, | |
2062 | + AIXpvh->pv_id.word4, | |
2063 | + AIXpvh->pv_num, | |
2064 | + AIXpvh->pv_state, AIXpvh->pvnum_vgdas, AIXpvh->res1, AIXpvh->res2); | |
2065 | + | |
2066 | + LOG_DEBUG(" PE Map: volgrp:%x AIXpvh->pv_num:%d partition:%p next:%p lv_index:%d pp_count:%d\n", | |
2067 | + volume_group->vg_id.word2, AIXpvh->pv_num, partition, | |
2068 | + partition->next, AIXppent->lv_index, pp_count); | |
2069 | + | |
2070 | + for (j = 0; j < pp_count; j++,AIXppent++) { | |
2071 | + if (!AIXppent->lv_index || AIXppent->pp_state == AIX_LVM_LVUNDEF) { | |
2072 | + continue; | |
2073 | + } | |
2074 | + | |
2075 | + LOG_EXTRA(" -- pv:%x pp:%d st:%d nm:%s lv:%d lp:%d cp:%d fst v:%d fst p:%d snd v:%d snd p:%d \n", | |
2076 | + volume_group->vg_id.word2, j + 1, | |
2077 | + AIXppent->pp_state, | |
2078 | + volume_group->volume_list[AIXppent->lv_index -1]->name, | |
2079 | + AIXppent->lv_index, AIXppent->lp_num, | |
2080 | + AIXppent->copy, AIXppent->fst_alt_vol, | |
2081 | + AIXppent->fst_alt_part, | |
2082 | + AIXppent->snd_alt_vol, | |
2083 | + AIXppent->snd_alt_part); | |
2084 | + | |
2085 | + le_number = AIXppent->lp_num - 1; // AIX lp's start @ 1, we want a 0 index | |
2086 | + offset = ((j * (volume_group->pe_size)) + AIXpvh->psn_part1); | |
2087 | + | |
2088 | + LOG_DEBUG(" PE Map: le_number:%d partition:%p lv_index:%d lv_name:%s\n", | |
2089 | + le_number, partition, AIXppent->lv_index, | |
2090 | + volume_group->volume_list[AIXppent->lv_index -1]->name); | |
2091 | + | |
2092 | + if (!volume_group->volume_list[AIXppent->lv_index - 1]) { | |
2093 | + LOG_SERIOUS("Failed attempt to access volume without memory allocation lv:%d\n", | |
2094 | + AIXppent->lv_index - 1); | |
2095 | + continue; | |
2096 | + } | |
2097 | + | |
2098 | + if (volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map | |
2099 | + && le_number <= volume_group->volume_list[AIXppent->lv_index - 1]->num_le) { | |
2100 | + | |
2101 | + volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map[le_number].owning_pv = partition; | |
2102 | + volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map[le_number].pe_sector_offset = offset; | |
2103 | + volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map[le_number].pp_state = AIXppent->pp_state; | |
2104 | + } | |
2105 | + | |
2106 | + if (volume_group->volume_list[AIXppent->lv_index -1]->mirror_copies > | |
2107 | + AIX_DEFAULT_MIRRORING) { | |
2108 | + | |
2109 | + LOG_EXTRA(" PE Map: Mirror found lv:%d -- \n", | |
2110 | + AIXppent->lv_index); | |
2111 | + | |
2112 | + for (mirror_partition = volume_group->partition_list, | |
2113 | + MirrorFound = FALSE; | |
2114 | + mirror_partition && !MirrorFound; | |
2115 | + mirror_partition = mirror_partition->next) { | |
2116 | + | |
2117 | + if (mirror_partition->pv_number == AIXppent->fst_alt_vol) { | |
2118 | + | |
2119 | + offset = (((AIXppent->fst_alt_part - 1) * (volume_group->pe_size)) + AIXpvh->psn_part1); | |
2120 | + | |
2121 | + volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map_mir1[le_number].owning_pv = mirror_partition; | |
2122 | + volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map_mir1[le_number].pe_sector_offset = offset; | |
2123 | + volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map_mir1[le_number].pp_state = AIXppent->pp_state; | |
2124 | + | |
2125 | + LOG_EXTRA(" PE Map: mirror_partition:%p \n", | |
2126 | + mirror_partition); | |
2127 | + LOG_EXTRA(" PE Map: mirror_sector_offet:%d\n", | |
2128 | + AIXppent->fst_alt_part); | |
2129 | + | |
2130 | + MirrorFound = TRUE; | |
2131 | + } | |
2132 | + } | |
2133 | + | |
2134 | + if (volume_group->volume_list[AIXppent->lv_index -1]->mirror_copies == AIX_MAX_MIRRORS) { | |
2135 | + | |
2136 | + for (mirror_partition = volume_group->partition_list, | |
2137 | + MirrorFound = FALSE; | |
2138 | + mirror_partition && !MirrorFound; | |
2139 | + mirror_partition = mirror_partition->next) { | |
2140 | + | |
2141 | + if (mirror_partition->pv_number == AIXppent->snd_alt_vol) { | |
2142 | + | |
2143 | + offset = (((AIXppent->snd_alt_part - 1) * (volume_group->pe_size)) + AIXpvh->psn_part1); | |
2144 | + | |
2145 | + volume_group->volume_list[AIXppent->lv_index-1]->le_to_pe_map_mir2[le_number].owning_pv = mirror_partition; | |
2146 | + volume_group->volume_list[AIXppent->lv_index-1]->le_to_pe_map_mir2[le_number].pe_sector_offset = offset; | |
2147 | + volume_group->volume_list[AIXppent->lv_index-1]->le_to_pe_map_mir2[le_number].pp_state = AIXppent->pp_state; | |
2148 | + | |
2149 | + LOG_EXTRA(" PE Map: mirror_partition2:%p \n", | |
2150 | + mirror_partition); | |
2151 | + LOG_EXTRA(" PE Map: mirror_sector_offet2:%d\n", | |
2152 | + AIXppent->snd_alt_part); | |
2153 | + | |
2154 | + MirrorFound = TRUE; | |
2155 | + } | |
2156 | + } | |
2157 | + } | |
2158 | + | |
2159 | + } // End of if mirroring is enabled | |
2160 | + } | |
2161 | + } | |
2162 | + | |
2163 | +// LOG_EXTRA(" PE Map: PE maps:%d Mirror count:%d -- \n", lvs, mirs); | |
2164 | + | |
2165 | +#ifdef EVMS_DEBUG_MIRRORS | |
2166 | + for (mirs = 0, lv_found = 0, all_lvs_found = FALSE; | |
2167 | + !all_lvs_found && mirs < LVM_MAXLVS; mirs++) { | |
2168 | + | |
2169 | + if (volume_group->volume_list[mirs] != NULL) { | |
2170 | + if (volume_group->volume_list[mirs]->lv_status == | |
2171 | + LV_ACTIVE) { | |
2172 | + | |
2173 | + lv_found++; | |
2174 | + | |
2175 | + LOG_DEBUG(" PE Map: owning part lv %d -- %p\n", | |
2176 | + mirs, | |
2177 | + volume_group->volume_list[mirs]-> | |
2178 | + le_to_pe_map[0].owning_pv); | |
2179 | + if (volume_group->volume_list[mirs]-> | |
2180 | + mirror_copies > AIX_DEFAULT_MIRRORING) { | |
2181 | + LOG_DEBUG(" PE Map: mirror_partition lv %d -- %p \n", | |
2182 | + mirs, | |
2183 | + volume_group->volume_list[mirs]-> | |
2184 | + le_to_pe_map_mir1[0].owning_pv); | |
2185 | + } | |
2186 | + if (volume_group->volume_list[mirs]-> | |
2187 | + mirror_copies == AIX_MAX_MIRRORS) { | |
2188 | + LOG_DEBUG(" PE Map: mirror_partition lv %d -- %p \n", | |
2189 | + mirs, | |
2190 | + volume_group->volume_list[mirs]-> | |
2191 | + le_to_pe_map_mir2[0].owning_pv); | |
2192 | + } | |
2193 | + } | |
2194 | + if (lv_found == volume_group->AIXvgh->numlvs) { | |
2195 | + all_lvs_found = TRUE; | |
2196 | + LOG_DEBUG(" PE Map: all_lvs_found\n"); | |
2197 | + } | |
2198 | + } | |
2199 | + } | |
2200 | +#endif | |
2201 | + | |
2202 | + kfree(AIXpvh); | |
2203 | + kfree(AIXppent_buff); | |
2204 | + | |
2205 | + return 0; | |
2206 | +} | |
2207 | + | |
2208 | +/* | |
2209 | + * Function: check_log_volume_and_pe_maps | |
2210 | + * | |
2211 | + * Make sure all volumes in this group have valid LE-to-PE maps. | |
2212 | + * Any volume that doesn't is deleted. This is safe for re-discovery | |
2213 | + * because only new volumes could have corrupted PE maps. | |
2214 | + */ | |
2215 | +static int | |
2216 | +check_log_volume_and_pe_maps(struct aix_volume_group *group) | |
2217 | +{ | |
2218 | + struct aix_logical_volume *volume; | |
2219 | + int i, j, lv_found, all_lvs_found; | |
2220 | + | |
2221 | + LOG_DEBUG(" check_pe_map.\n"); | |
2222 | + | |
2223 | + for (i = 0, all_lvs_found = FALSE, lv_found = 0; | |
2224 | + !all_lvs_found && i < LVM_MAXLVS; i++) { | |
2225 | + if (!group->volume_list[i]) { | |
2226 | + LOG_DEBUG(" CPEM No Volume %d found \n", i); | |
2227 | + continue; | |
2228 | + } | |
2229 | + | |
2230 | + volume = group->volume_list[i]; | |
2231 | + if (!volume->le_to_pe_map) { | |
2232 | + LOG_DEBUG(" CPEM Volume %s has no PE map.\n", | |
2233 | + volume->name); | |
2234 | + delete_logical_volume(volume); | |
2235 | + continue; | |
2236 | + } | |
2237 | + | |
2238 | + LOG_DEBUG(" CPEM volume %s num_le: %d \n", volume->name, | |
2239 | + volume->num_le); | |
2240 | + | |
2241 | + lv_found++; | |
2242 | + | |
2243 | + if (lv_found == group->AIXvgh->numlvs) { | |
2244 | + all_lvs_found = TRUE; | |
2245 | + } | |
2246 | + | |
2247 | + for (j = 0; j < volume->num_le; j++) { | |
2248 | + if (!volume->le_to_pe_map[j].owning_pv || | |
2249 | + !volume->le_to_pe_map[j].pe_sector_offset) { | |
2250 | + LOG_SERIOUS(" CPEM Volume (%s) incomplete PE map (LE %d) \n", | |
2251 | + volume->name, j); | |
2252 | + volume->lv_access |= EVMS_LV_INCOMPLETE; | |
2253 | + } | |
2254 | + | |
2255 | + if (volume->mirror_copies > AIX_DEFAULT_MIRRORING) { | |
2256 | + if (!volume->le_to_pe_map_mir1[j].owning_pv || | |
2257 | + !volume->le_to_pe_map_mir1[j]. | |
2258 | + pe_sector_offset) { | |
2259 | + LOG_SERIOUS(" CPEM Volume (%s) incomplete PE mirror map 1 (LE %d) \n", | |
2260 | + volume->name, j); | |
2261 | + volume->lv_access |= EVMS_LV_INCOMPLETE; | |
2262 | + } | |
2263 | + | |
2264 | + if (volume->mirror_copies == AIX_MAX_MIRRORS) { | |
2265 | + if (!volume->le_to_pe_map_mir2[j]. | |
2266 | + owning_pv | |
2267 | + || !volume->le_to_pe_map_mir2[j]. | |
2268 | + pe_sector_offset) { | |
2269 | + LOG_SERIOUS(" CPEM Volume (%s) incomplete PE mirror map 2 (LE %d) \n", | |
2270 | + volume->name, j); | |
2271 | + volume->lv_access |= EVMS_LV_INCOMPLETE; | |
2272 | + } | |
2273 | + } | |
2274 | + } | |
2275 | + } | |
2276 | + } | |
2277 | + | |
2278 | + LOG_EXTRA(" Leaving check_pe_map.\n"); | |
2279 | + return 0; | |
2280 | +} | |
2281 | + | |
2282 | +/* | |
2283 | + * Function: export_volumes | |
2284 | + * | |
2285 | + * The last thing this VGE must do is take each constructed volume and | |
2286 | + * place it back on the evms logical partition list. | |
2287 | + */ | |
2288 | +static int | |
2289 | +export_volumes(struct evms_logical_node **evms_partition_list) | |
2290 | +{ | |
2291 | + struct aix_volume_group *AIXVGLPtr; | |
2292 | + struct evms_logical_node *new_node; | |
2293 | + struct aix_logical_volume *volume; | |
2294 | + int j, lv_found, all_lvs_found; | |
2295 | + int count = 0; | |
2296 | + | |
2297 | + for (AIXVGLPtr = AIXVolumeGroupList; AIXVGLPtr; AIXVGLPtr = AIXVGLPtr->next) { | |
2298 | + | |
2299 | + if (!(AIXVGLPtr->flags & AIX_VG_DIRTY)) { | |
2300 | + LOG_DEBUG(" EV Existing group(%d), not dirty, skipping\n", | |
2301 | + AIXVGLPtr->vg_id.word2); | |
2302 | + continue; | |
2303 | + } | |
2304 | + LOG_DEBUG(" Exporting all new volumes numpvs:%d numlvs:%d \n", | |
2305 | + AIXVGLPtr->AIXvgh->numpvs, AIXVGLPtr->numlvs); | |
2306 | + | |
2307 | + // Export every valid volume in the group. For re-discovery, | |
2308 | + // make sure we are only exporting "new" volumes. | |
2309 | + | |
2310 | + for (j = 0, all_lvs_found = FALSE, lv_found = 0; | |
2311 | + !all_lvs_found && j < LVM_MAXLVS; j++) { | |
2312 | + if (AIXVGLPtr->volume_list[j] != NULL) { | |
2313 | + if (AIXVGLPtr->volume_list[j]->lv_access & EVMS_LV_NEW) { | |
2314 | + | |
2315 | + LOG_DEBUG(" EV Checking LV:[%d] volume:%p\n", | |
2316 | + j,AIXVGLPtr->volume_list[j]); | |
2317 | + | |
2318 | + volume = AIXVGLPtr->volume_list[j]; | |
2319 | + lv_found++; | |
2320 | + | |
2321 | + if (lv_found == AIXVGLPtr->AIXvgh->numlvs) { | |
2322 | + all_lvs_found = TRUE; | |
2323 | + } | |
2324 | + // For new volumes, create a new EVMS node and | |
2325 | + // initialize the appropriate fields. | |
2326 | + if (evms_cs_allocate_logical_node(&new_node)) { | |
2327 | + LOG_DEBUG(" Export Vol Error allocating node !!\n"); | |
2328 | + continue; | |
2329 | + } else { | |
2330 | + LOG_DEBUG(" EV Node allocated OK\n"); | |
2331 | + } | |
2332 | + | |
2333 | +// volume->new_volume = 0; | |
2334 | + volume->volume_node = new_node; | |
2335 | + volume->lv_access &= (~EVMS_LV_NEW); | |
2336 | + new_node->hardsector_size = AIXVGLPtr->hard_sect_size; | |
2337 | + new_node->block_size = AIXVGLPtr->block_size; | |
2338 | + new_node->plugin = &plugin_header; | |
2339 | + new_node->private = volume; | |
2340 | + new_node->total_vsectors = volume->lv_size; | |
2341 | + | |
2342 | + LOG_DEBUG(" EV volume->name:[%s]\n", | |
2343 | + volume->name); | |
2344 | + | |
2345 | + strncpy(new_node->name,volume->name, | |
2346 | + EVMS_VOLUME_NAME_SIZE + 1); | |
2347 | + | |
2348 | + // Is the volume read-only? | |
2349 | + if (!(volume->lv_access & AIX_LV_WRITE) | |
2350 | + || volume->lv_access & EVMS_LV_INCOMPLETE) | |
2351 | + { | |
2352 | + new_node->flags |= EVMS_VOLUME_SET_READ_ONLY; | |
2353 | + LOG_DEBUG(" EV Read Only volume->lv_access:%d\n", | |
2354 | + volume->lv_access); | |
2355 | + } | |
2356 | + | |
2357 | + evms_cs_add_logical_node_to_list(evms_partition_list, | |
2358 | + new_node); | |
2359 | + count++; | |
2360 | + | |
2361 | + LOG_DEBUG(" Exporting LVM volume %p new_node:%p ESD->volume_name[%s]\n", | |
2362 | + volume, new_node,new_node->name); | |
2363 | + } else { | |
2364 | + evms_cs_add_logical_node_to_list(evms_partition_list, | |
2365 | + AIXVGLPtr->volume_list[j]->volume_node); | |
2366 | + count++; | |
2367 | + LOG_DEBUG(" ELV vol_list[%d]%p\n", j, | |
2368 | + AIXVGLPtr->volume_list[j]); | |
2369 | + } | |
2370 | + } else { | |
2371 | + LOG_DEBUG(" EV Checking LV:[%d] == NULL\n",j); | |
2372 | + } | |
2373 | + } // end checking all lvs | |
2374 | + | |
2375 | + AIXVGLPtr->flags &= ~AIX_VG_DIRTY; | |
2376 | + } | |
2377 | + | |
2378 | + return count; | |
2379 | +} | |
2380 | + | |
2381 | +/* | |
2382 | + * Function: delete_logical_volume | |
2383 | + * | |
2384 | + * This function deletes the in-memory representation of a single LVM | |
2385 | + * logical volume, including its PE map and any snapshot data. It does | |
2386 | + * not alter the parent volume group, except to remove this volume from | |
2387 | + * its volume list. | |
2388 | + */ | |
2389 | +static int | |
2390 | +delete_logical_volume(struct aix_logical_volume *volume) | |
2391 | +{ | |
2392 | + struct aix_volume_group *group = volume->group; | |
2393 | + | |
2394 | + LOG_DEBUG(" Deleting volume %s\n", volume->name); | |
2395 | + | |
2396 | + // Now free up all the memory. This includes the LE-to-PE map, any | |
2397 | + // mirror PEs, etc. | |
2398 | + if (volume->le_to_pe_map) { | |
2399 | + kfree(volume->le_to_pe_map); | |
2400 | + volume->le_to_pe_map = NULL; | |
2401 | + } | |
2402 | + | |
2403 | + if (volume->le_to_pe_map_mir1) { | |
2404 | + kfree(volume->le_to_pe_map_mir1); | |
2405 | + volume->le_to_pe_map_mir1 = NULL; | |
2406 | + } | |
2407 | + | |
2408 | + if (volume->le_to_pe_map_mir2) { | |
2409 | + kfree(volume->le_to_pe_map_mir2); | |
2410 | + volume->le_to_pe_map_mir2 = NULL; | |
2411 | + } | |
2412 | + // Remove this volume from the volume-group's list. | |
2413 | + if (group && group->volume_list[volume->lv_number] == volume) { | |
2414 | + group->volume_list[volume->lv_number] = NULL; | |
2415 | + group->numlvs--; | |
2416 | + } | |
2417 | + | |
2418 | + kfree(volume); | |
2419 | + | |
2420 | + return 0; | |
2421 | +} | |
2422 | + | |
2423 | +/* Function: remove_group_from_list | |
2424 | + * | |
2425 | + * Remove an LVM volume group from the global LVM list. | |
2426 | + */ | |
2427 | +static int | |
2428 | +remove_group_from_list(struct aix_volume_group *group) | |
2429 | +{ | |
2430 | + struct aix_volume_group **p_group; | |
2431 | + | |
2432 | + for (p_group = &AIXVolumeGroupList; *p_group; | |
2433 | + p_group = &(*p_group)->next) { | |
2434 | + if (*p_group == group) { | |
2435 | + *p_group = (*p_group)->next; | |
2436 | + group->next = NULL; | |
2437 | + break; | |
2438 | + } | |
2439 | + } | |
2440 | + return 0; | |
2441 | +} | |
2442 | + | |
2443 | +/* | |
2444 | + * Function: delete_aix_node | |
2445 | + * | |
2446 | + * This function deletes the in-memory representation of an LVM | |
2447 | + * logical volume. Right now it makes a lot of assumptions about | |
2448 | + * the data in the group not being corrupted. It would be possible | |
2449 | + * to put in a lot of consistency checks before deleting everything | |
2450 | + * to indicate if problems have occurred during the lifetime of the | |
2451 | + * volume and its volume group. | |
2452 | + */ | |
2453 | +static int | |
2454 | +delete_aix_node(struct evms_logical_node *logical_node) | |
2455 | +{ | |
2456 | + struct aix_logical_volume *volume = | |
2457 | + (struct aix_logical_volume *) (logical_node->private); | |
2458 | + struct aix_volume_group *group = volume->group; | |
2459 | + | |
2460 | + if (delete_logical_volume(volume)) { | |
2461 | + return -EINVAL; | |
2462 | + } | |
2463 | + // If we just removed the last volume from this group, the entire group | |
2464 | + // can also be deleted. | |
2465 | + if (group && group->numlvs == 0) { | |
2466 | + remove_group_from_list(group); | |
2467 | + deallocate_volume_group(group); | |
2468 | + } | |
2469 | + // Free the logical node. | |
2470 | + evms_cs_deallocate_logical_node(logical_node); | |
2471 | + | |
2472 | + return 0; | |
2473 | +} | |
2474 | + | |
2475 | +/* Function: deallocate_volume_group | |
2476 | + * | |
2477 | + * This function deletes the entire in-memory representation of an LVM | |
2478 | + * volume group, including all partitions and logical volumes. If this | |
2479 | + * group is on the VGE's volume group list, it is removed. | |
2480 | + */ | |
2481 | +static int | |
2482 | +deallocate_volume_group(struct aix_volume_group *group) | |
2483 | +{ | |
2484 | + struct partition_list_entry *partition; | |
2485 | + struct partition_list_entry *next_part; | |
2486 | + int i; | |
2487 | + | |
2488 | + LOG_DEBUG(" Deleting volume group %x\n", group->vg_id.word2); | |
2489 | + | |
2490 | + // Delete all partitions from the group's list. | |
2491 | + for (partition = group->partition_list; partition; | |
2492 | + partition = next_part) { | |
2493 | + | |
2494 | + next_part = partition->next; | |
2495 | + | |
2496 | + if (partition->logical_node) { | |
2497 | + // Send a delete command down to the partition manager. | |
2498 | + LOG_DEBUG(" Deleting PV %d from group %x\n", | |
2499 | + partition->pv_number, group->vg_id.word2); | |
2500 | + DELETE(partition->logical_node); | |
2501 | + } | |
2502 | + kfree(partition); | |
2503 | + } | |
2504 | + | |
2505 | + // Delete all logical volumes, and the array of pointers. | |
2506 | + for (i = 0; i < LVM_MAXLVS; i++) { | |
2507 | + if (group->volume_list[i]) { | |
2508 | + delete_logical_volume(group->volume_list[i]); | |
2509 | + } | |
2510 | + } | |
2511 | + | |
2512 | + kfree(group); | |
2513 | + | |
2514 | + return 0; | |
2515 | +} | |
2516 | + | |
2517 | +/* Function: end_discover_aix | |
2518 | + * | |
2519 | + * The discovery process at the region-manager level is now iterative, | |
2520 | + * much like the EVMS feature level. To accomplish this correctly, and | |
2521 | + * also to accomplish partial volume discovery, a second discover | |
2522 | + * entry point is needed, so EVMS can tell the region managers that | |
2523 | + * discovery is over, and to finish up any discovery that is not yet | |
2524 | + * complete. When this function is called, it should be assumed that | |
2525 | + * the node list has had nothing new added to it since the last call | |
2526 | + * of the regular discover function. Therefore, when this function is | |
2527 | + * called, we do not need to try to discovery any additional volume | |
2528 | + * groups. We will, however, look for logical volumes once more. This | |
2529 | + * gives us the ability to export (read-only) volumes that have | |
2530 | + * partially corrupted LE maps due to missing PVs in their VG. | |
2531 | + */ | |
2532 | +static int | |
2533 | +end_discover_aix(struct evms_logical_node **evms_logical_disk_head) | |
2534 | +{ | |
2535 | + | |
2536 | + int rc; | |
2537 | + | |
2538 | + MOD_INC_USE_COUNT; | |
2539 | + LOG_DEBUG("Final Discovery:\n"); | |
2540 | + | |
2541 | + rc = discover_logical_volumes(); | |
2542 | + | |
2543 | + if (!rc) { | |
2544 | + rc = export_volumes(evms_logical_disk_head); | |
2545 | + | |
2546 | + lvm_cleanup(); | |
2547 | + } | |
2548 | + | |
2549 | + MOD_DEC_USE_COUNT; | |
2550 | + return rc; | |
2551 | +} | |
2552 | + | |
2553 | +/**************************************************** | |
2554 | +* Function: AIX_alloc_wbh | |
2555 | +* | |
2556 | +* Alloc any buffer heads from the pool and return a linked list | |
2557 | +* | |
2558 | +* | |
2559 | +*****************************************************/ | |
2560 | +static struct aix_mirror_bh * | |
2561 | +AIX_alloc_wbh(struct evms_logical_node *node, | |
2562 | + struct evms_logical_node *node2, | |
2563 | + struct evms_logical_node *node3, | |
2564 | + struct buffer_head *bh, | |
2565 | + u32 mirror_copies, u32 le, u64 new_sector2, u64 new_sector3) | |
2566 | +{ | |
2567 | + struct aix_mirror_bh *tmp_bh = NULL, *head_bh = NULL; | |
2568 | + int i; | |
2569 | + | |
2570 | + head_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE); | |
2571 | + | |
2572 | + if (!head_bh) { | |
2573 | + LOG_SERIOUS("Unable to allocate memory for mirror pool line:%d\n", | |
2574 | + __LINE__); | |
2575 | + return NULL; | |
2576 | + } | |
2577 | + | |
2578 | + head_bh->master_bh = bh; | |
2579 | + head_bh->mirror_bh_list = NULL; | |
2580 | + head_bh->remaining = (atomic_t) ATOMIC_INIT(0); | |
2581 | + | |
2582 | + for (i = AIX_DEFAULT_MIRRORING; i <= mirror_copies; i++) { | |
2583 | + | |
2584 | + tmp_bh = | |
2585 | + evms_cs_allocate_from_pool(AIX_BH_list_pool, | |
2586 | + EVMS_BLOCKABLE); | |
2587 | + if (!tmp_bh) { | |
2588 | + LOG_SERIOUS("Unable to allocate memory for mirror pool line:%d\n", | |
2589 | + __LINE__); | |
2590 | + return NULL; | |
2591 | + } | |
2592 | + | |
2593 | + tmp_bh->next_r1 = head_bh->mirror_bh_list; | |
2594 | + head_bh->mirror_bh_list = tmp_bh; | |
2595 | + atomic_inc(&head_bh->remaining); | |
2596 | + | |
2597 | + memcpy(&tmp_bh->bh_req, bh, sizeof (struct buffer_head)); | |
2598 | + tmp_bh->remaining = (atomic_t) ATOMIC_INIT(0); | |
2599 | + init_waitqueue_head(&tmp_bh->bh_req.b_wait); | |
2600 | + //tmp_bh->bh_req.b_size = bh->b_size; | |
2601 | + | |
2602 | + switch (i) { | |
2603 | + | |
2604 | + case AIX_DEFAULT_MIRRORING: | |
2605 | + tmp_bh->node = node; | |
2606 | + tmp_bh->bh_req.b_rsector = bh->b_rsector; | |
2607 | + break; | |
2608 | + | |
2609 | + case AIX_FIRST_MIRROR: | |
2610 | + tmp_bh->node = node2; | |
2611 | + tmp_bh->bh_req.b_rsector = new_sector2; | |
2612 | + break; | |
2613 | + | |
2614 | + case AIX_MAX_MIRRORS: | |
2615 | + tmp_bh->node = node3; | |
2616 | + tmp_bh->bh_req.b_rsector = new_sector3; | |
2617 | + break; | |
2618 | + } | |
2619 | + | |
2620 | + tmp_bh->bh_req.b_end_io = AIX_handle_write_mirror_drives; //setup callback routine | |
2621 | + tmp_bh->bh_req.b_private = (void *) head_bh; | |
2622 | + | |
2623 | + } | |
2624 | + | |
2625 | + return head_bh; | |
2626 | + | |
2627 | +} | |
2628 | + | |
2629 | +/**************************************************** | |
2630 | +* Function: AIX_handle_write_mirror_drives | |
2631 | +* | |
2632 | +* Handles a write from a set of mirrored AIX LVs | |
2633 | + | |
2634 | +* | |
2635 | +* | |
2636 | +*****************************************************/ | |
2637 | +static void | |
2638 | +AIX_handle_write_mirror_drives(struct buffer_head *bh, int uptodate) | |
2639 | +{ | |
2640 | + struct aix_logical_volume *volume; | |
2641 | + struct evms_logical_node *node; | |
2642 | + struct aix_mirror_bh *tmp_bh = NULL, *tmp_bh2 = NULL; | |
2643 | + kdev_t tmp_b_rdev; | |
2644 | + u32 count, le = 0; | |
2645 | + | |
2646 | + tmp_bh = (struct aix_mirror_bh *) bh->b_private; | |
2647 | + tmp_b_rdev = tmp_bh->master_bh->b_rdev; | |
2648 | + node = tmp_bh->node; | |
2649 | + volume = (struct aix_logical_volume *) node->private; | |
2650 | + | |
2651 | + LOG_DEBUG("AHWMD node:%p bh_flags:%lu uptodate:%d mirror_copies:%d \n", | |
2652 | + node, bh->b_state, uptodate, volume->mirror_copies); | |
2653 | + | |
2654 | + if (!uptodate) { | |
2655 | + le = tmp_bh->le; | |
2656 | + | |
2657 | + switch (tmp_bh->iteration) { | |
2658 | + case AIX_DEFAULT_MIRRORING: | |
2659 | + volume->le_to_pe_map[le].pp_state += AIX_LVM_LVSTALE; | |
2660 | + break; | |
2661 | + | |
2662 | + case AIX_FIRST_MIRROR: | |
2663 | + volume->le_to_pe_map_mir1[le].pp_state += | |
2664 | + AIX_LVM_LVSTALE; | |
2665 | + break; | |
2666 | + | |
2667 | + case AIX_MAX_MIRRORS: | |
2668 | + volume->le_to_pe_map_mir2[le].pp_state += | |
2669 | + AIX_LVM_LVSTALE; | |
2670 | + break; | |
2671 | + } | |
2672 | + | |
2673 | + AIX_evms_cs_notify_lv_io_error(node); | |
2674 | + } | |
2675 | + | |
2676 | + if (atomic_dec_and_test(&tmp_bh->remaining)) { | |
2677 | + tmp_bh->master_bh->b_end_io(tmp_bh->master_bh, uptodate); | |
2678 | + tmp_bh2 = tmp_bh->mirror_bh_list; | |
2679 | + evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh); | |
2680 | + | |
2681 | + while (tmp_bh2) { | |
2682 | + tmp_bh = tmp_bh2->next_r1; | |
2683 | + evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh2); | |
2684 | + tmp_bh2 = tmp_bh; | |
2685 | + } | |
2686 | + | |
2687 | + evms_cs_volume_request_in_progress(tmp_b_rdev, | |
2688 | + AIX_DECREMENT_REQUEST, | |
2689 | + &count); | |
2690 | + } | |
2691 | + | |
2692 | + return; | |
2693 | +} | |
2694 | + | |
2695 | +/**************************************************** | |
2696 | +* Function: AIX_alloc_rbh | |
2697 | +* | |
2698 | +* Alloc any buffer heads from the pool and return a linked list | |
2699 | +* | |
2700 | +* | |
2701 | +*****************************************************/ | |
2702 | +static struct aix_mirror_bh * | |
2703 | +AIX_alloc_rbh(struct evms_logical_node *node, | |
2704 | + struct buffer_head *bh, | |
2705 | + u32 mirror_copies, u32 le, u64 org_sector, int cmd) | |
2706 | +{ | |
2707 | + struct aix_mirror_bh *tmp_bh = NULL; | |
2708 | + | |
2709 | + tmp_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE); | |
2710 | + | |
2711 | + if (!tmp_bh) { | |
2712 | + LOG_SERIOUS | |
2713 | + ("Unable to allocate memory for mirror pool line:%d\n", | |
2714 | + __LINE__); | |
2715 | + return NULL; | |
2716 | + } | |
2717 | + | |
2718 | + memcpy(&tmp_bh->bh_req, bh, sizeof (struct buffer_head)); | |
2719 | + tmp_bh->remaining = (atomic_t) ATOMIC_INIT(0); | |
2720 | + tmp_bh->node = node; | |
2721 | + tmp_bh->master_bh = bh; | |
2722 | + tmp_bh->iteration = AIX_FIRST_MIRROR; | |
2723 | + //tmp_bh->eio.rsector = eio->rsector; | |
2724 | + //tmp_bh->eio.rsize = eio->rsize; | |
2725 | + tmp_bh->le = le; | |
2726 | + //tmp_bh->eio.bh = &tmp_bh->bh_req; | |
2727 | + | |
2728 | + if (cmd == AIX_LV_READ) { | |
2729 | + tmp_bh->bh_req.b_end_io = AIX_handle_read_mirror_drives; //setup callback routine | |
2730 | + } else { | |
2731 | + tmp_bh->bh_req.b_end_io = AIX_sync_mirrored_partitions; //setup callback routine | |
2732 | + } | |
2733 | + | |
2734 | + tmp_bh->bh_req.b_private = (void *) tmp_bh; | |
2735 | + | |
2736 | + tmp_bh->cmd = cmd; | |
2737 | + tmp_bh->next_r1 = NULL; | |
2738 | + tmp_bh->node = node; | |
2739 | + | |
2740 | + return tmp_bh; | |
2741 | + | |
2742 | +} | |
2743 | + | |
2744 | +/**************************************************** | |
2745 | +* Function: AIX_reschedule_retry | |
2746 | +* | |
2747 | +* reschedule a read of one of our mirror copies | |
2748 | +* | |
2749 | +* | |
2750 | +*****************************************************/ | |
2751 | +static void | |
2752 | +AIX_reschedule_retry(struct aix_mirror_bh *aix_bh) | |
2753 | +{ | |
2754 | + unsigned long flags; | |
2755 | + | |
2756 | + spin_lock_irqsave(&AIX_retry_list_lock, flags); | |
2757 | + if (AIX_retry_list == NULL) | |
2758 | + AIX_retry_tail = &AIX_retry_list; | |
2759 | + *AIX_retry_tail = aix_bh; | |
2760 | + AIX_retry_tail = &aix_bh->next_r1; | |
2761 | + aix_bh->next_r1 = NULL; | |
2762 | + spin_unlock_irqrestore(&AIX_retry_list_lock, flags); | |
2763 | + evms_cs_wakeup_thread(AIX_mirror_read_retry_thread); | |
2764 | +} | |
2765 | + | |
2766 | +/**************************************************** | |
2767 | +* Function: AIX_handle_read_mirror_drives | |
2768 | +* | |
2769 | +* Handles a read from a set of mirrored AIX LVs | |
2770 | + | |
2771 | +* | |
2772 | +* | |
2773 | +*****************************************************/ | |
2774 | +static void | |
2775 | +AIX_handle_read_mirror_drives(struct buffer_head *bh, int uptodate) | |
2776 | +{ | |
2777 | + struct aix_logical_volume *volume; | |
2778 | + struct evms_logical_node *node; | |
2779 | + struct aix_mirror_bh *tmp_bh; | |
2780 | + kdev_t tmp_b_rdev; | |
2781 | + u32 count, le = 0; | |
2782 | + | |
2783 | + tmp_bh = (struct aix_mirror_bh *) bh->b_private; | |
2784 | + tmp_b_rdev = tmp_bh->master_bh->b_rdev; | |
2785 | + volume = (struct aix_logical_volume *) tmp_bh->node->private; | |
2786 | + node = tmp_bh->node; | |
2787 | + le = tmp_bh->le; | |
2788 | + | |
2789 | + LOG_DEBUG("AHRMD node:%p bh_flags:%lu uptodate:%d mirror_copies:%d \n", | |
2790 | + node, bh->b_state, uptodate, volume->mirror_copies); | |
2791 | + | |
2792 | + switch (tmp_bh->iteration) { | |
2793 | + case AIX_DEFAULT_MIRRORING: | |
2794 | + count = volume->le_to_pe_map[le].pp_state; | |
2795 | + break; | |
2796 | + | |
2797 | + case AIX_FIRST_MIRROR: | |
2798 | + count = volume->le_to_pe_map[le].pp_state; | |
2799 | + break; | |
2800 | + | |
2801 | + case AIX_MAX_MIRRORS: | |
2802 | + count = volume->le_to_pe_map[le].pp_state; | |
2803 | + break; | |
2804 | + } | |
2805 | + | |
2806 | + if (count == (AIX_LVM_LVSTALE + AIX_LVM_LVDEFINED)) { | |
2807 | + uptodate = 0; | |
2808 | + count = 0; | |
2809 | + } | |
2810 | + | |
2811 | + if (!uptodate && tmp_bh->iteration < volume->mirror_copies) { | |
2812 | + AIX_evms_cs_notify_lv_io_error(node); | |
2813 | + AIX_reschedule_retry(tmp_bh); | |
2814 | + } else { | |
2815 | + tmp_bh->master_bh->b_end_io(tmp_bh->master_bh, uptodate); | |
2816 | + evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh); | |
2817 | + evms_cs_volume_request_in_progress(tmp_b_rdev, | |
2818 | + AIX_DECREMENT_REQUEST, | |
2819 | + &count); | |
2820 | + | |
2821 | + } | |
2822 | + | |
2823 | + return; | |
2824 | +} | |
2825 | + | |
2826 | +/**************************************************** | |
2827 | +* This is a temporary function until a common EVMS | |
2828 | +* notification function can be created. | |
2829 | +* | |
2830 | +*****************************************************/ | |
2831 | +static int | |
2832 | +AIX_evms_cs_notify_lv_io_error(struct evms_logical_node *node) | |
2833 | +{ | |
2834 | + struct aix_logical_volume *volume; | |
2835 | + | |
2836 | + volume = (struct aix_logical_volume *) node->private; | |
2837 | + | |
2838 | + LOG_CRITICAL("Notify_ERROR !! node:%p volume->lv_status:%d volume->name:[%s]\n", | |
2839 | + node, volume->lv_status, volume->name); | |
2840 | + | |
2841 | + return 0; | |
2842 | +} | |
2843 | + | |
2844 | +/* Function: lvm_cleanup | |
2845 | + * | |
2846 | + * This function runs through the entire lvm data structure, removing | |
2847 | + * all items that are not needed at runtime. Currently, this is just the | |
2848 | + * vg_disk_t structure and the pv_disk_t structure for each PV. Also, any | |
2849 | + * groups that don't contain any volumes are deleted. All of the other | |
2850 | + * volume_group, logical_volume and evms_logical_node structures will be | |
2851 | + * kept around at run-time. | |
2852 | + */ | |
2853 | +static int | |
2854 | +lvm_cleanup(void) | |
2855 | +{ | |
2856 | + struct aix_volume_group *group; | |
2857 | + | |
2858 | + group = AIXVolumeGroupList; | |
2859 | + | |
2860 | + while (group) { | |
2861 | + | |
2862 | + if (group->AIXvgh) { | |
2863 | + kfree(group->AIXvgh); | |
2864 | + group->AIXvgh = NULL; | |
2865 | + } | |
2866 | + | |
2867 | + group = group->next; | |
2868 | + } | |
2869 | + | |
2870 | + return 0; | |
2871 | +} | |
2872 | + | |
2873 | +/**************************************************** | |
2874 | +* Function: AIX_copy_header_info | |
2875 | +* | |
2876 | +* Copy the disk header info into the volume struct | |
2877 | +* so we can use it later. | |
2878 | +* | |
2879 | +* | |
2880 | +* | |
2881 | +*****************************************************/ | |
2882 | +static int | |
2883 | +AIX_copy_header_info(struct vg_header *AIXvgh, struct vg_header *AIXvgh2) | |
2884 | +{ | |
2885 | + | |
2886 | + LOG_DEBUG("CHI AIXvgh:%p AIXvgh2:%p\n", AIXvgh, AIXvgh2); | |
2887 | + | |
2888 | + if (AIXvgh) { | |
2889 | + | |
2890 | + AIXvgh->vg_timestamp.tv_sec = AIXvgh2->vg_timestamp.tv_sec; | |
2891 | + AIXvgh->vg_timestamp.tv_nsec = AIXvgh2->vg_timestamp.tv_nsec; | |
2892 | + AIXvgh->vg_id.word1 = AIXvgh2->vg_id.word1; | |
2893 | + AIXvgh->vg_id.word2 = AIXvgh2->vg_id.word2; | |
2894 | + AIXvgh->vg_id.word3 = AIXvgh2->vg_id.word3; | |
2895 | + AIXvgh->vg_id.word4 = AIXvgh2->vg_id.word4; | |
2896 | + AIXvgh->numlvs = AIXvgh2->numlvs; | |
2897 | + AIXvgh->maxlvs = AIXvgh2->maxlvs; | |
2898 | + AIXvgh->pp_size = AIXvgh2->pp_size; | |
2899 | + AIXvgh->numpvs = AIXvgh2->numpvs; | |
2900 | + AIXvgh->total_vgdas = AIXvgh2->total_vgdas; | |
2901 | + AIXvgh->vgda_size = AIXvgh2->vgda_size; | |
2902 | + AIXvgh->bigvg = AIXvgh2->bigvg; | |
2903 | + AIXvgh->quorum = AIXvgh2->quorum; | |
2904 | + AIXvgh->auto_varyon = AIXvgh2->auto_varyon; | |
2905 | + AIXvgh->checksum = AIXvgh2->checksum; | |
2906 | + AIXvgh->bigda_size = AIXvgh2->bigda_size; | |
2907 | + | |
2908 | + } else { | |
2909 | + return -ENOMEM; | |
2910 | + } | |
2911 | + | |
2912 | + LOG_DEBUG("Returning CHI AIXvgh:%p AIXvgh2:%p\n", AIXvgh, AIXvgh2); | |
2913 | + | |
2914 | + return 0; | |
2915 | +} | |
2916 | + | |
2917 | +/**************************************************** | |
2918 | +* Function: AIX_free_header | |
2919 | +* | |
2920 | +* | |
2921 | +* | |
2922 | +* | |
2923 | +* | |
2924 | +*****************************************************/ | |
2925 | +static void | |
2926 | +AIX_free_headers(struct vg_header *AIXvgh, struct vg_header *AIXvgh2, | |
2927 | + struct vg_trailer *AIXvgt, struct vg_trailer *AIXvgt2) | |
2928 | +{ | |
2929 | + | |
2930 | + if (AIXvgh) { | |
2931 | + kfree(AIXvgh); | |
2932 | + AIXvgh = NULL; | |
2933 | + } | |
2934 | + | |
2935 | + if (AIXvgh2) { | |
2936 | + kfree(AIXvgh2); | |
2937 | + AIXvgh2 = NULL; | |
2938 | + } | |
2939 | + | |
2940 | + if (AIXvgt) { | |
2941 | + kfree(AIXvgt); | |
2942 | + AIXvgt = NULL; | |
2943 | + } | |
2944 | + | |
2945 | + if (AIXvgt2) { | |
2946 | + kfree(AIXvgt2); | |
2947 | + AIXvgt2 = NULL; | |
2948 | + } | |
2949 | + | |
2950 | +} | |
2951 | + | |
2952 | +/**************************************************** | |
2953 | +* Function: AIXiod | |
2954 | +* | |
2955 | +* This is a kernel thread that handles read of mirrors | |
2956 | +* This shouldn't ever run on a non-mirrored LV read | |
2957 | +* | |
2958 | +* | |
2959 | +*****************************************************/ | |
2960 | +static void | |
2961 | +AIXiod(void *data) | |
2962 | +{ | |
2963 | + struct aix_mirror_bh *r1_bh; | |
2964 | + struct evms_logical_node *node; | |
2965 | + unsigned long flags; | |
2966 | + | |
2967 | + while (1) { | |
2968 | + | |
2969 | + spin_lock_irqsave(&AIX_retry_list_lock, flags); | |
2970 | + if (AIX_retry_list == NULL) { | |
2971 | + spin_unlock_irqrestore(&AIX_retry_list_lock, flags); | |
2972 | + break; | |
2973 | + } | |
2974 | + r1_bh = AIX_retry_list; | |
2975 | + AIX_retry_list = r1_bh->next_r1; | |
2976 | + spin_unlock_irqrestore(&AIX_retry_list_lock, flags); | |
2977 | + r1_bh->next_r1 = NULL; // for mark | |
2978 | + | |
2979 | + switch (r1_bh->cmd) { | |
2980 | + case AIX_LV_READ: | |
2981 | + | |
2982 | + r1_bh->iteration++; | |
2983 | + LOG_DEBUG("Report from thread AIXiod READ\n"); | |
2984 | + | |
2985 | + if (r1_bh->iteration == AIX_FIRST_MIRROR) { | |
2986 | + node = r1_bh->mir_node1; | |
2987 | + r1_bh->bh_req.b_rsector = r1_bh->mir_sector1; | |
2988 | + } else { | |
2989 | + node = r1_bh->mir_node2; | |
2990 | + r1_bh->bh_req.b_rsector = r1_bh->mir_sector2; | |
2991 | + } | |
2992 | + | |
2993 | + R_IO(node, &r1_bh->bh_req); | |
2994 | + | |
2995 | + break; | |
2996 | + | |
2997 | + default: | |
2998 | + LOG_DEBUG("AIXiod unknown cmd passed to thread:%d\n", | |
2999 | + r1_bh->cmd); | |
3000 | + break; | |
3001 | + } | |
3002 | + | |
3003 | + } | |
3004 | + return; | |
3005 | +} | |
3006 | + | |
3007 | +/**************************************************** | |
3008 | +* Function: AIX_schedule_resync | |
3009 | +* | |
3010 | +* schedule a resync of one of our lv mirror copies | |
3011 | +* | |
3012 | +* | |
3013 | +*****************************************************/ | |
3014 | +static void | |
3015 | +AIX_schedule_resync(struct aix_logical_volume *resync_volume, int force) | |
3016 | +{ | |
3017 | + unsigned long flags; | |
3018 | + | |
3019 | + LOG_DEBUG("Function %s volume: %s \n", __FUNCTION__, | |
3020 | + resync_volume->name); | |
3021 | + | |
3022 | + spin_lock_irqsave(&AIX_resync_list_lock, flags); | |
3023 | + | |
3024 | + if (!AIX_resync_list) { | |
3025 | + AIX_resync_list = | |
3026 | + kmalloc(sizeof (struct aix_resync_struct), GFP_ATOMIC); | |
3027 | + if (!AIX_resync_list) { | |
3028 | + return; | |
3029 | + } | |
3030 | + memset(AIX_resync_list, 0, sizeof (struct aix_resync_struct)); | |
3031 | + } | |
3032 | + | |
3033 | + AIX_resync_list->resync_vol = resync_volume; | |
3034 | + AIX_resync_list->next_resync_vol = NULL; | |
3035 | + | |
3036 | + spin_unlock_irqrestore(&AIX_resync_list_lock, flags); | |
3037 | + evms_cs_wakeup_thread(AIX_mirror_resync_thread); | |
3038 | +} | |
3039 | + | |
3040 | +/**************************************************** | |
3041 | +* Function: AIXresync | |
3042 | +* | |
3043 | +* This is a kernel thread that handles resync of mirrors | |
3044 | +* This shouldn't ever run on a non-mirrored LV | |
3045 | +* | |
3046 | +* | |
3047 | +*****************************************************/ | |
3048 | +static void | |
3049 | +AIXresync(void *data) | |
3050 | +{ | |
3051 | + | |
3052 | + struct aix_logical_volume *volume = NULL; | |
3053 | + int force = FALSE; // Currently we don't force a resync of non-stale pe's | |
3054 | + | |
3055 | + if (AIX_resync_list == NULL) { | |
3056 | + LOG_ERROR("No Volumes on list to resync\n"); | |
3057 | + return; | |
3058 | + } | |
3059 | + | |
3060 | + volume = AIX_resync_list->resync_vol; | |
3061 | + LOG_DEBUG("Function %s volume: %s \n", __FUNCTION__, volume->name); | |
3062 | + | |
3063 | + if (!volume) { | |
3064 | + LOG_ERROR("Invalid volume passed to sync\n"); | |
3065 | + return; | |
3066 | + } | |
3067 | + | |
3068 | + if (AIXResyncInProgress) { | |
3069 | + LOG_ERROR("Unable to resync multiple LVs concurrently %s\n", | |
3070 | + volume->name); | |
3071 | + return; | |
3072 | + } | |
3073 | + | |
3074 | + if (volume->mirror_copies == AIX_DEFAULT_MIRRORING) { | |
3075 | + LOG_ERROR("Unable to resync non-mirrored LV %s \n", | |
3076 | + volume->name); | |
3077 | + return; | |
3078 | + } | |
3079 | + | |
3080 | + AIXResyncInProgress = TRUE; | |
3081 | + | |
3082 | + AIX_resync_lv_mirrors(volume, force); | |
3083 | + | |
3084 | + return; | |
3085 | +} | |
3086 | + | |
3087 | +/**************************************************** | |
3088 | +* Function: AIX_resync_lv_mirrors | |
3089 | +* | |
3090 | +* | |
3091 | +* | |
3092 | +* | |
3093 | +* | |
3094 | +*****************************************************/ | |
3095 | +static int | |
3096 | +AIX_resync_lv_mirrors(struct aix_logical_volume *volume, int force) | |
3097 | +{ | |
3098 | + | |
3099 | + int i; | |
3100 | + char pp_stale = FALSE; | |
3101 | + | |
3102 | + struct partition_list_entry *master_part = NULL; | |
3103 | + struct partition_list_entry *slave1_part = NULL; | |
3104 | + struct partition_list_entry *slave2_part = NULL; | |
3105 | + | |
3106 | + u64 master_offset = 0; | |
3107 | + u64 slave1_offset = 0; | |
3108 | + u64 slave2_offset = 0; | |
3109 | + | |
3110 | + LOG_DEBUG("Function %s volume: %s \n", __FUNCTION__, volume->name); | |
3111 | + | |
3112 | + for (i = 0; i < volume->num_le; i++, pp_stale = FALSE) { | |
3113 | + | |
3114 | + // We need to see which mirror has a valid non-stale copy. | |
3115 | + // The first non-stale copy will be our master and we'll | |
3116 | + // copy to the slave(s). | |
3117 | + | |
3118 | + if ((volume->le_to_pe_map[i].pp_state & AIX_LVM_LVSTALE)) { | |
3119 | + pp_stale = TRUE; | |
3120 | + } | |
3121 | + | |
3122 | + if (volume->le_to_pe_map_mir1 != NULL) { | |
3123 | + if ((volume->le_to_pe_map_mir1[i]. | |
3124 | + pp_state & AIX_LVM_LVSTALE)) { | |
3125 | + pp_stale = TRUE; | |
3126 | + } | |
3127 | + } | |
3128 | + | |
3129 | + if (volume->le_to_pe_map_mir2 != NULL) { | |
3130 | + if ((volume->le_to_pe_map_mir2[i]. | |
3131 | + pp_state & AIX_LVM_LVSTALE)) { | |
3132 | + pp_stale = TRUE; | |
3133 | + } | |
3134 | + } | |
3135 | + | |
3136 | + LOG_DEBUG("Function %s pp_stale:%d force:%d \n", __FUNCTION__, | |
3137 | + pp_stale, force); | |
3138 | + | |
3139 | + if (pp_stale || force) { | |
3140 | + if (!(volume->le_to_pe_map[i].pp_state & AIX_LVM_LVSTALE)) { | |
3141 | + | |
3142 | + master_part = volume->le_to_pe_map[i].owning_pv; | |
3143 | + master_offset = volume->le_to_pe_map[i].pe_sector_offset; | |
3144 | + | |
3145 | + if (volume->le_to_pe_map_mir1 != NULL) { | |
3146 | + slave1_part = volume->le_to_pe_map_mir1[i].owning_pv; | |
3147 | + slave1_offset = volume->le_to_pe_map_mir1[i].pe_sector_offset; | |
3148 | + } | |
3149 | + | |
3150 | + if (volume->le_to_pe_map_mir2 != NULL) { | |
3151 | + slave2_part = volume->le_to_pe_map_mir2[i].owning_pv; | |
3152 | + slave2_offset = volume->le_to_pe_map_mir2[i].pe_sector_offset; | |
3153 | + } | |
3154 | + } else | |
3155 | + if (!(volume->le_to_pe_map_mir1[i].pp_state & AIX_LVM_LVSTALE)) { | |
3156 | + master_part = volume->le_to_pe_map_mir1[i].owning_pv; | |
3157 | + master_offset = volume->le_to_pe_map_mir1[i].pe_sector_offset; | |
3158 | + | |
3159 | + if (volume->le_to_pe_map != NULL) { | |
3160 | + slave1_part = volume->le_to_pe_map[i].owning_pv; | |
3161 | + slave1_offset = volume->le_to_pe_map[i].pe_sector_offset; | |
3162 | + } | |
3163 | + | |
3164 | + if (volume->le_to_pe_map_mir2 != NULL) { | |
3165 | + slave2_part = volume->le_to_pe_map_mir2[i].owning_pv; | |
3166 | + slave2_offset = volume->le_to_pe_map_mir2[i].pe_sector_offset; | |
3167 | + } | |
3168 | + } else | |
3169 | + if (!(volume->le_to_pe_map_mir2[i].pp_state & AIX_LVM_LVSTALE)) { | |
3170 | + master_part = volume->le_to_pe_map_mir2[i].owning_pv; | |
3171 | + master_offset = volume->le_to_pe_map_mir2[i].pe_sector_offset; | |
3172 | + | |
3173 | + if (volume->le_to_pe_map != NULL) { | |
3174 | + slave1_part = volume->le_to_pe_map[i].owning_pv; | |
3175 | + slave1_offset = volume->le_to_pe_map[i].pe_sector_offset; | |
3176 | + } | |
3177 | + | |
3178 | + if (volume->le_to_pe_map_mir1 != NULL) { | |
3179 | + slave2_part = volume->le_to_pe_map_mir1[i].owning_pv; | |
3180 | + slave2_offset = volume->le_to_pe_map_mir1[i].pe_sector_offset; | |
3181 | + } | |
3182 | + } | |
3183 | + | |
3184 | + if (AIX_copy_on_read(volume, master_part, slave1_part, slave2_part, | |
3185 | + master_offset, slave1_offset, slave2_offset, | |
3186 | + volume->pe_size, i)) { | |
3187 | + | |
3188 | + LOG_CRITICAL("ReSync of logical Volume %s FAILED !!\n", | |
3189 | + volume->name); | |
3190 | + AIX_evms_cs_notify_lv_io_error(volume-> | |
3191 | + volume_node); | |
3192 | + break; | |
3193 | + } | |
3194 | + | |
3195 | + } | |
3196 | + | |
3197 | + } | |
3198 | + | |
3199 | + return 0; | |
3200 | +} | |
3201 | + | |
3202 | +/**************************************************** | |
3203 | +* Function: AIX_copy_on_read | |
3204 | +* | |
3205 | +* | |
3206 | +* | |
3207 | +* | |
3208 | +* | |
3209 | +*****************************************************/ | |
3210 | +static int | |
3211 | +AIX_copy_on_read(struct aix_logical_volume *volume, | |
3212 | + struct partition_list_entry *master_part, | |
3213 | + struct partition_list_entry *slave1_part, | |
3214 | + struct partition_list_entry *slave2_part, | |
3215 | + u64 master_offset, | |
3216 | + u64 slave1_offset, u64 slave2_offset, u32 pe_size, int le) | |
3217 | +{ | |
3218 | + unsigned long flags; | |
3219 | + struct aix_mirror_bh *tmp_bh = NULL; | |
3220 | + | |
3221 | + // Check for valid partitions we need at least 2 good partitions so slave2 doesn't have to be valid | |
3222 | + | |
3223 | + if (!master_part || !slave1_part) { | |
3224 | + LOG_ERROR("Invalid partitions for resync master part:%p slave1_part:%p slave2_part:%p\n", | |
3225 | + master_part, slave1_part, slave2_part); | |
3226 | + return -EINVAL; | |
3227 | + } | |
3228 | + | |
3229 | + LOG_DEBUG("Function %s volume:%s master_part:%d, slave1_part:%d, slave2_part:%d master_offset:" | |
3230 | + PFU64 ", slave1_offset:" PFU64 " slave2_offset:" PFU64 ", \n", | |
3231 | + __FUNCTION__, volume->name, master_part->pv_number, | |
3232 | + slave1_part->pv_number, slave2_part->pv_number, master_offset, | |
3233 | + slave1_offset, slave2_offset); | |
3234 | + | |
3235 | + LOG_DEBUG("pe_size:%d le:%d\n", pe_size, le); | |
3236 | + | |
3237 | + tmp_bh = | |
3238 | + AIX_alloc_sbh(volume, master_part, slave1_part, slave2_part, | |
3239 | + master_offset, slave1_offset, slave2_offset, pe_size); | |
3240 | + | |
3241 | + if (!tmp_bh) { | |
3242 | + buffer_IO_error(&tmp_bh->bh_req); | |
3243 | + return -ENOMEM; | |
3244 | + } | |
3245 | + | |
3246 | +/* if (evms_cs_volume_request_in_progress | |
3247 | + (tmp_bh->bh_req.b_rdev, AIX_INCREMENT_REQUEST, &count)) { | |
3248 | + buffer_IO_error(&tmp_bh->bh_req); | |
3249 | + return -EIO; | |
3250 | + } */ | |
3251 | + | |
3252 | + spin_lock_irqsave(&AIX_resync_pp_lock, flags); | |
3253 | + | |
3254 | + LOG_DEBUG("Function:%s kicking off read node:%p\n", __FUNCTION__, | |
3255 | + master_part->logical_node); | |
3256 | + | |
3257 | + R_IO(master_part->logical_node, &tmp_bh->bh_req); | |
3258 | + | |
3259 | + spin_unlock_irqrestore(&AIX_resync_pp_lock, flags); | |
3260 | + | |
3261 | + return 0; | |
3262 | +} | |
3263 | + | |
3264 | +/**************************************************** | |
3265 | +* Function: AIX_alloc_sbh | |
3266 | +* | |
3267 | +* Alloc any buffer heads from the pool and return a linked list | |
3268 | +* | |
3269 | +* | |
3270 | +*****************************************************/ | |
3271 | +static struct aix_mirror_bh * | |
3272 | +AIX_alloc_sbh(struct aix_logical_volume *volume, | |
3273 | + struct partition_list_entry *master_part, | |
3274 | + struct partition_list_entry *slave1_part, | |
3275 | + struct partition_list_entry *slave2_part, | |
3276 | + u64 master_offset, | |
3277 | + u64 slave1_offset, u64 slave2_offset, u32 pe_size) | |
3278 | +{ | |
3279 | + struct aix_mirror_bh *tmp_bh = NULL, *head_bh = NULL; | |
3280 | + unsigned long flags; | |
3281 | + | |
3282 | + LOG_DEBUG("Function:%s Enter\n", __FUNCTION__); | |
3283 | + | |
3284 | + head_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE); | |
3285 | + if (!head_bh) { | |
3286 | + LOG_SERIOUS | |
3287 | + ("Unable to allocate memory for mirror pool line:%d\n", | |
3288 | + __LINE__); | |
3289 | + return NULL; | |
3290 | + } | |
3291 | + // Update buffer so we block on a read/write on the normal IO path | |
3292 | + // if we're trying to sync the same sector on the disk | |
3293 | + // We don't want to block if it's different sectors | |
3294 | + | |
3295 | + spin_lock_irqsave(&AIX_resync_list_lock, flags); | |
3296 | + | |
3297 | + AIX_resync_list->master_part = master_part; | |
3298 | + AIX_resync_list->slave1_part = slave1_part; | |
3299 | + AIX_resync_list->slave2_part = slave2_part; | |
3300 | + AIX_resync_list->master_offset = master_offset; | |
3301 | + AIX_resync_list->slave1_offset = slave1_offset; | |
3302 | + AIX_resync_list->slave2_offset = slave2_offset; | |
3303 | + | |
3304 | + head_bh->bh_req.b_data = kmalloc(AIX_RESYNC_BLOCKSIZE + 1, GFP_NOIO); | |
3305 | + if (!head_bh->bh_req.b_data) { | |
3306 | + evms_cs_deallocate_to_pool(AIX_BH_list_pool, head_bh); | |
3307 | + LOG_SERIOUS | |
3308 | + ("Unable to allocate memory for mirror pool line:%d\n", | |
3309 | + __LINE__); | |
3310 | + return NULL; | |
3311 | + } | |
3312 | + | |
3313 | + memset(head_bh->bh_req.b_data, 0, AIX_RESYNC_BLOCKSIZE + 1); | |
3314 | + | |
3315 | + head_bh->remaining = (atomic_t) ATOMIC_INIT(0); | |
3316 | + head_bh->bh_req.b_rsector = master_offset; | |
3317 | + head_bh->bh_req.b_size = AIX_RESYNC_BLOCKSIZE; | |
3318 | + head_bh->sync_flag = AIX_SYNC_INCOMPLETE; | |
3319 | + head_bh->bh_req.b_end_io = AIX_sync_mirrored_partitions; | |
3320 | + head_bh->bh_req.b_page = virt_to_page(head_bh->bh_req.b_data); | |
3321 | + head_bh->bh_req.b_state = 0; | |
3322 | + set_bit(BH_Dirty, &head_bh->bh_req.b_state); | |
3323 | + set_bit(BH_Lock, &head_bh->bh_req.b_state); | |
3324 | + set_bit(BH_Req, &head_bh->bh_req.b_state); | |
3325 | + set_bit(BH_Mapped, &head_bh->bh_req.b_state); | |
3326 | + head_bh->master_bh = NULL; | |
3327 | + head_bh->mirror_bh_list = NULL; | |
3328 | + | |
3329 | + tmp_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE); | |
3330 | + if (!tmp_bh) { | |
3331 | + LOG_SERIOUS | |
3332 | + ("Unable to allocate memory for mirror pool line:%d\n", | |
3333 | + __LINE__); | |
3334 | + return NULL; | |
3335 | + } | |
3336 | + | |
3337 | + head_bh->next_r1 = tmp_bh; | |
3338 | + memcpy(&tmp_bh->bh_req, head_bh, sizeof (struct buffer_head)); | |
3339 | + tmp_bh->remaining = (atomic_t) ATOMIC_INIT(0); | |
3340 | + tmp_bh->bh_req.b_end_io = NULL; | |
3341 | + | |
3342 | + if (volume->mirror_copies == AIX_MAX_MIRRORS) { | |
3343 | + tmp_bh->next_r1 = | |
3344 | + evms_cs_allocate_from_pool(AIX_BH_list_pool, | |
3345 | + EVMS_BLOCKABLE); | |
3346 | + if (!tmp_bh->next_r1) { | |
3347 | + LOG_SERIOUS | |
3348 | + ("Unable to allocate memory for mirror pool line:%d\n", | |
3349 | + __LINE__); | |
3350 | + return NULL; | |
3351 | + } | |
3352 | + | |
3353 | + memcpy(&tmp_bh->next_r1->bh_req, head_bh, | |
3354 | + sizeof (struct buffer_head)); | |
3355 | + tmp_bh->next_r1->bh_req.b_end_io = NULL; | |
3356 | + tmp_bh->next_r1->remaining = (atomic_t) ATOMIC_INIT(0); | |
3357 | + } | |
3358 | + | |
3359 | + init_waitqueue_head(&head_bh->bh_req.b_wait); | |
3360 | + | |
3361 | + spin_unlock_irqrestore(&AIX_resync_list_lock, flags); | |
3362 | + | |
3363 | + LOG_DEBUG("Function:%s Exit head_bh:%p\n", __FUNCTION__, head_bh); | |
3364 | + | |
3365 | + return head_bh; | |
3366 | +} | |
3367 | + | |
3368 | +/**************************************************** | |
3369 | +* Function: AIX_sync_mirrored_partitions | |
3370 | +* | |
3371 | +* | |
3372 | +* | |
3373 | +* | |
3374 | +* | |
3375 | +*****************************************************/ | |
3376 | +static void | |
3377 | +AIX_sync_mirrored_partitions(struct buffer_head *bh, int uptodate) | |
3378 | +{ | |
3379 | + struct aix_logical_volume *volume = NULL; | |
3380 | + struct aix_mirror_bh *tmp_bh, *head_bh; | |
3381 | + | |
3382 | + head_bh = tmp_bh = (struct aix_mirror_bh *) bh->b_private; | |
3383 | + volume = (struct aix_logical_volume *) tmp_bh->node->private; | |
3384 | + | |
3385 | + LOG_DEBUG("Function:%s Enter uptodate:%d\n", __FUNCTION__, uptodate); | |
3386 | + | |
3387 | + if (!uptodate) { | |
3388 | + | |
3389 | + AIX_evms_cs_notify_lv_io_error(tmp_bh->node); | |
3390 | + } | |
3391 | + | |
3392 | + tmp_bh = head_bh->next_r1; | |
3393 | + | |
3394 | + LOG_DEBUG("Function:%s line:%d write to mirror:%p\n", __FUNCTION__, | |
3395 | + __LINE__, tmp_bh); | |
3396 | + | |
3397 | + if (tmp_bh) { | |
3398 | + W_IO(tmp_bh->node, &tmp_bh->bh_req); | |
3399 | + AIX_get_set_mirror_offset(tmp_bh, AIX_SLAVE_1, | |
3400 | + AIX_RESYNC_BLOCKSIZE); | |
3401 | + } | |
3402 | + | |
3403 | + tmp_bh = tmp_bh->next_r1; | |
3404 | + LOG_DEBUG("Function:%s line:%d write to mirror:%p\n", __FUNCTION__, | |
3405 | + __LINE__, tmp_bh); | |
3406 | + | |
3407 | + if (tmp_bh) { | |
3408 | + W_IO(tmp_bh->node, &tmp_bh->bh_req); | |
3409 | + AIX_get_set_mirror_offset(tmp_bh, AIX_SLAVE_2, | |
3410 | + AIX_RESYNC_BLOCKSIZE); | |
3411 | + } | |
3412 | + | |
3413 | + LOG_DEBUG("Function:%s line:%d read from master:%p\n", __FUNCTION__, | |
3414 | + __LINE__, head_bh); | |
3415 | + | |
3416 | + if (head_bh && head_bh->sync_flag) { | |
3417 | + AIX_get_set_mirror_offset(head_bh, AIX_MASTER, | |
3418 | + AIX_RESYNC_BLOCKSIZE); | |
3419 | + if (head_bh->sync_flag == AIX_SYNC_INCOMPLETE) { | |
3420 | + R_IO(head_bh->node, &head_bh->bh_req); | |
3421 | + } | |
3422 | + } | |
3423 | + | |
3424 | + LOG_DEBUG("Function:%s line:%d head_bh->sync_flag:%d\n", __FUNCTION__, | |
3425 | + __LINE__, head_bh->sync_flag); | |
3426 | + | |
3427 | + if (!head_bh->sync_flag) { | |
3428 | + tmp_bh = head_bh; | |
3429 | + head_bh = head_bh->next_r1; | |
3430 | + | |
3431 | + while (tmp_bh != NULL) { | |
3432 | + evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh); | |
3433 | + tmp_bh = head_bh; | |
3434 | + } | |
3435 | + | |
3436 | + AIXResyncInProgress = FALSE; | |
3437 | +/* evms_cs_volume_request_in_progress(tmp_bh->bh_req.b_rdev, | |
3438 | + AIX_DECREMENT_REQUEST, | |
3439 | + &count); */ | |
3440 | + | |
3441 | + if (AIX_resync_list) { | |
3442 | + kfree(AIX_resync_list); | |
3443 | + } | |
3444 | + } | |
3445 | + | |
3446 | + return; | |
3447 | +} | |
3448 | + | |
3449 | +/**************************************************** | |
3450 | +* Function: AIX_get_set_mirror_offset | |
3451 | +* | |
3452 | +* | |
3453 | +* | |
3454 | +* | |
3455 | +* | |
3456 | +*****************************************************/ | |
3457 | +static int | |
3458 | +AIX_get_set_mirror_offset(struct aix_mirror_bh *tmp_bh, int index, int offset) | |
3459 | +{ | |
3460 | + int flags; | |
3461 | + | |
3462 | + if (!tmp_bh) { | |
3463 | + return -EINVAL; | |
3464 | + } | |
3465 | + | |
3466 | + LOG_DEBUG("Function:%s Enter offset:%d\n", __FUNCTION__, offset); | |
3467 | + | |
3468 | + tmp_bh->bh_req.b_rsector += tmp_bh->bh_req.b_rsector + offset; | |
3469 | + | |
3470 | + if (tmp_bh->bh_req.b_rsector > tmp_bh->node->total_vsectors) { | |
3471 | + tmp_bh->sync_flag = AIX_SYNC_COMPLETE; | |
3472 | + return -EIO; | |
3473 | + } | |
3474 | + // Update buffer so we block on a read/write on the normal IO path | |
3475 | + // if we're trying to sync the same sector on the disk | |
3476 | + // We don't want to block if it's different sectors | |
3477 | + | |
3478 | + spin_lock_irqsave(&AIX_resync_list_lock, flags); | |
3479 | + | |
3480 | + if (AIX_resync_list->master_part->logical_node == tmp_bh->node) { | |
3481 | + AIX_resync_list->master_offset += offset; | |
3482 | + } | |
3483 | + | |
3484 | + if (AIX_resync_list->slave1_part->logical_node == tmp_bh->node) { | |
3485 | + AIX_resync_list->slave1_offset += offset; | |
3486 | + } | |
3487 | + | |
3488 | + if (AIX_resync_list->slave2_part->logical_node == tmp_bh->node) { | |
3489 | + AIX_resync_list->slave2_offset += offset; | |
3490 | + } | |
3491 | + | |
3492 | + spin_unlock_irqrestore(&AIX_resync_list_lock, flags); | |
3493 | + | |
3494 | + return 0; | |
3495 | + | |
3496 | +} | |
3497 | + | |
3498 | +static int AIX_pvh_data_posn(u32 vgda_psn, u32 * pvh_posn, struct partition_list_entry *partition, u32 numpvs) | |
3499 | +{ | |
3500 | + struct partition_list_entry * pv; | |
3501 | + struct pv_header * AIXpvh; | |
3502 | + int posn = 0; | |
3503 | + int num_pps; | |
3504 | + int tmp,i; | |
3505 | + | |
3506 | + LOG_DEBUG("APDP - vgda_psn:%d numpvs:%d \n", vgda_psn, numpvs); | |
3507 | + | |
3508 | + AIXpvh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL); | |
3509 | + if (!AIXpvh) { | |
3510 | + return -ENOMEM; | |
3511 | + } | |
3512 | + | |
3513 | + memset(AIXpvh, 0 , sizeof(struct pv_header)); | |
3514 | + | |
3515 | + // Adjust this because when AIX VGs/Volumes are created on Intel platforms, the | |
3516 | + // pp_count could be anything since we don't give up the entire physical drive. | |
3517 | + // This is for calculation purposes only. | |
3518 | + | |
3519 | + pvh_posn[0] = 0; | |
3520 | + pv = partition; | |
3521 | + | |
3522 | + for (i = 1; i <= numpvs; i++) { | |
3523 | + for (pv = partition; pv->pv_number != i; pv = pv->next ); | |
3524 | + | |
3525 | + LOG_DEBUG("APDP line:%d pp_count:%d \n", __LINE__, AIXpvh->pp_count); | |
3526 | + | |
3527 | + num_pps = AIXpvh->pp_count; | |
3528 | + num_pps++; // Account for the pv_header on the front | |
3529 | + | |
3530 | + while ((num_pps * sizeof(struct pp_entries)) % AIX_SECTOR_SIZE) { | |
3531 | + LOG_EXTRA("num_pps:%d \n", num_pps); | |
3532 | + num_pps++; | |
3533 | + } | |
3534 | + | |
3535 | + tmp = (num_pps * sizeof(struct pp_entries)) / AIX_SECTOR_SIZE; | |
3536 | + | |
3537 | + LOG_DEBUG("APDP tmp:%d num_pps:%d \n", tmp,num_pps); | |
3538 | + | |
3539 | + posn = ((vgda_psn + PSN_PPH_OFFSET) + ((pv->pv_number -1) * tmp)); | |
3540 | + | |
3541 | + pvh_posn[pv->pv_number] = posn; | |
3542 | + | |
3543 | + if (INIT_IO(pv->logical_node, 0, posn, 1, AIXpvh)) { | |
3544 | + kfree(AIXpvh); | |
3545 | + return -EIO; | |
3546 | + } | |
3547 | + | |
3548 | + pv = partition; | |
3549 | + } | |
3550 | + | |
3551 | + kfree(AIXpvh); | |
3552 | + | |
3553 | + return 0; | |
3554 | +} | |
3555 | + | |
3556 | +/**************************************************** | |
3557 | +* Function: AIX_volume_group_dump | |
3558 | +* | |
3559 | +* This is for debug purposes and will walk the volume group list | |
3560 | +* and LV's within the volume groups | |
3561 | +* | |
3562 | +* It can be called at anytime however the output to the display is large | |
3563 | +* | |
3564 | +*****************************************************/ | |
3565 | +#ifdef EVMS_AIX_DEBUG | |
3566 | +static int | |
3567 | +AIX_volume_group_dump(void) | |
3568 | +{ | |
3569 | + struct aix_volume_group *AIXVGLDebugPtr; | |
3570 | + struct partition_list_entry *DebugPartitionList; | |
3571 | + struct aix_logical_volume *DebugLVList; | |
3572 | + int i; | |
3573 | + | |
3574 | + AIXVGLDebugPtr = AIXVolumeGroupList; | |
3575 | + | |
3576 | + if (!AIXVGLDebugPtr) { | |
3577 | + LOG_DEBUG("***********************************************\n"); | |
3578 | + LOG_DEBUG("ERROR Nothing built in the list to check !!! \n"); | |
3579 | + LOG_DEBUG("***********************************************\n"); | |
3580 | + return 0; | |
3581 | + } | |
3582 | + | |
3583 | + LOG_DEBUG("*********************************************** \n"); | |
3584 | + LOG_DEBUG("Begin Volume Group Dump \n"); | |
3585 | + LOG_DEBUG("*********************************************** \n"); | |
3586 | + | |
3587 | + while (AIXVGLDebugPtr) { | |
3588 | + | |
3589 | + LOG_DEBUG("vg_number %x\n", AIXVGLDebugPtr->vg_id.word2); | |
3590 | + LOG_DEBUG("numpsrtitions %d\n", AIXVGLDebugPtr->partition_count); | |
3591 | + LOG_DEBUG("numlvs %d\n", AIXVGLDebugPtr->numlvs); | |
3592 | + LOG_DEBUG("hard_sect_size %d\n", AIXVGLDebugPtr->hard_sect_size); | |
3593 | + LOG_DEBUG("block_size %d\n", AIXVGLDebugPtr->block_size); | |
3594 | + LOG_DEBUG("flags %d\n", AIXVGLDebugPtr->flags); | |
3595 | +// LOG_DEBUG("lv_max %d\n", AIXVGLDebugPtr->lv_max); | |
3596 | + LOG_DEBUG("pe_size %d\n", AIXVGLDebugPtr->pe_size); | |
3597 | + LOG_DEBUG("CleanVGInfo %d\n", AIXVGLDebugPtr->CleanVGInfo); | |
3598 | + | |
3599 | + DebugPartitionList = AIXVGLDebugPtr->partition_list; | |
3600 | + | |
3601 | + LOG_DEBUG("********* Begin Volume Partition Dump ********* \n"); | |
3602 | + | |
3603 | + if (!DebugPartitionList) { | |
3604 | + LOG_DEBUG("No partitions to check !! \n"); | |
3605 | + } | |
3606 | + | |
3607 | + while (DebugPartitionList) { | |
3608 | + LOG_DEBUG("logical_node %p\n", | |
3609 | + DebugPartitionList->logical_node); | |
3610 | + LOG_DEBUG("pv_number %d\n", | |
3611 | + DebugPartitionList->pv_number); | |
3612 | + LOG_DEBUG("block_size %d\n", | |
3613 | + DebugPartitionList->block_size); | |
3614 | + LOG_DEBUG("hard_sect_size %d\n", | |
3615 | + DebugPartitionList->hard_sect_size); | |
3616 | + LOG_DEBUG("-------------------------------------------------------------\n"); | |
3617 | + DebugPartitionList = DebugPartitionList->next; | |
3618 | + } | |
3619 | + | |
3620 | + LOG_DEBUG("********* End Volume Partition Dump **********\n"); | |
3621 | + | |
3622 | + LOG_DEBUG("********** Begin Logical Volume Partition Dump **********\n"); | |
3623 | + | |
3624 | + DebugLVList = AIXVGLDebugPtr->volume_list[0]; | |
3625 | + | |
3626 | + if (!DebugLVList) { | |
3627 | + LOG_DEBUG("No logical volumes to check !! \n"); | |
3628 | + } | |
3629 | + | |
3630 | + for (i = 0; i < LVM_MAXLVS && DebugLVList; i++) { | |
3631 | + | |
3632 | + DebugLVList = AIXVGLDebugPtr->volume_list[i]; | |
3633 | + | |
3634 | + if (DebugLVList) { | |
3635 | + LOG_DEBUG("volume_list # %d \n", i); | |
3636 | + LOG_DEBUG("lv_number %d \n", | |
3637 | + DebugLVList->lv_number); | |
3638 | + LOG_DEBUG("LV name %s \n", | |
3639 | + DebugLVList->name); | |
3640 | + LOG_DEBUG("lv_size " PFU64 " \n", | |
3641 | + DebugLVList->lv_size); | |
3642 | + LOG_DEBUG("lv_access %d \n", | |
3643 | + DebugLVList->lv_access); | |
3644 | + LOG_DEBUG("lv_status %d \n", | |
3645 | + DebugLVList->lv_status); | |
3646 | +// LOG_DEBUG("lv_minor %d \n", | |
3647 | +// DebugLVList->lv_minor); | |
3648 | + LOG_DEBUG("mirror_copies %d \n", | |
3649 | + DebugLVList->mirror_copies); | |
3650 | +// LOG_DEBUG("mirror_number %d \n", | |
3651 | +// DebugLVList->mirror_number); | |
3652 | + LOG_DEBUG("stripes %d \n", | |
3653 | + DebugLVList->stripes); | |
3654 | + LOG_DEBUG("stripe_size %d \n", | |
3655 | + DebugLVList->stripe_size); | |
3656 | + LOG_DEBUG("stripe_size_shift%d \n", | |
3657 | + DebugLVList->stripe_size_shift); | |
3658 | + LOG_DEBUG("pe_size %d \n", | |
3659 | + DebugLVList->pe_size); | |
3660 | + LOG_DEBUG("pe_size_shift %d \n", | |
3661 | + DebugLVList->pe_size_shift); | |
3662 | + LOG_DEBUG("num_le %d \n", | |
3663 | + DebugLVList->num_le); | |
3664 | +// LOG_DEBUG("new_volume %d \n", | |
3665 | +// DebugLVList->new_volume); | |
3666 | + LOG_DEBUG("group %p \n", | |
3667 | + DebugLVList->group); | |
3668 | + } | |
3669 | + | |
3670 | + } | |
3671 | + | |
3672 | + AIXVGLDebugPtr = AIXVGLDebugPtr->next; | |
3673 | + | |
3674 | + LOG_DEBUG("********** End Logical Volume Partition Dump **********\n"); | |
3675 | + | |
3676 | + } | |
3677 | + | |
3678 | + LOG_DEBUG("***********************************************\n"); | |
3679 | + LOG_DEBUG("End Volume Group Dump \n"); | |
3680 | + LOG_DEBUG("***********************************************\n"); | |
3681 | + | |
3682 | + return 0; | |
3683 | + | |
3684 | +} | |
3685 | +#endif | |
3686 | diff -Naur linux-2002-09-30/drivers/evms/Config.in evms-2002-09-30/drivers/evms/Config.in | |
3687 | --- linux-2002-09-30/drivers/evms/Config.in Wed Dec 31 18:00:00 1969 | |
3688 | +++ evms-2002-09-30/drivers/evms/Config.in Mon Sep 16 15:55:24 2002 | |
3689 | @@ -0,0 +1,60 @@ | |
3690 | +# | |
3691 | +# Copyright (c) International Business Machines Corp., 2000 | |
3692 | +# | |
3693 | +# This program is free software; you can redistribute it and/or modify | |
3694 | +# it under the terms of the GNU General Public License as published by | |
3695 | +# the Free Software Foundation; either version 2 of the License, or | |
3696 | +# (at your option) any later version. | |
3697 | +# | |
3698 | +# This program is distributed in the hope that it will be useful, | |
3699 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
3700 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
3701 | +# the GNU General Public License for more details. | |
3702 | +# | |
3703 | +# You should have received a copy of the GNU General Public License | |
3704 | +# along with this program; if not, write to the Free Software | |
3705 | +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3706 | +# | |
3707 | +# | |
3708 | +# EVMS driver configuration | |
3709 | +# | |
3710 | + | |
3711 | +mainmenu_option next_comment | |
3712 | +comment 'Enterprise Volume Management System' | |
3713 | + | |
3714 | +tristate 'EVMS Kernel Runtime' CONFIG_EVMS | |
3715 | +dep_tristate ' EVMS Local Device Manager' CONFIG_EVMS_LOCAL_DEV_MGR $CONFIG_EVMS | |
3716 | +dep_tristate ' EVMS DOS Segment Manager' CONFIG_EVMS_DOS_SEGMENT_MGR $CONFIG_EVMS | |
3717 | +dep_tristate ' EVMS GPT Segment Manager' CONFIG_EVMS_GPT_SEGMENT_MGR $CONFIG_EVMS | |
3718 | +if [ "$CONFIG_ARCH_S390" = "y" ]; then | |
3719 | +dep_tristate ' EVMS S/390 Segment Manager' CONFIG_EVMS_S390_SEGMENT_MGR $CONFIG_EVMS | |
3720 | +fi | |
3721 | +dep_tristate ' EVMS SnapShot Feature' CONFIG_EVMS_SNAPSHOT $CONFIG_EVMS | |
3722 | +dep_tristate ' EVMS DriveLink Feature' CONFIG_EVMS_DRIVELINK $CONFIG_EVMS | |
3723 | +dep_tristate ' EVMS Bad Block Relocation (BBR) Feature' CONFIG_EVMS_BBR $CONFIG_EVMS | |
3724 | +dep_tristate ' EVMS Linux LVM Package' CONFIG_EVMS_LVM $CONFIG_EVMS | |
3725 | +dep_tristate ' EVMS Linux MD Package' CONFIG_EVMS_MD $CONFIG_EVMS | |
3726 | +dep_tristate ' EVMS MD Linear (append) mode' CONFIG_EVMS_MD_LINEAR $CONFIG_EVMS_MD | |
3727 | +dep_tristate ' EVMS MD RAID-0 (stripe) mode' CONFIG_EVMS_MD_RAID0 $CONFIG_EVMS_MD | |
3728 | +dep_tristate ' EVMS MD RAID-1 (mirroring) mode' CONFIG_EVMS_MD_RAID1 $CONFIG_EVMS_MD | |
3729 | +dep_tristate ' EVMS MD RAID-4/RAID-5 mode' CONFIG_EVMS_MD_RAID5 $CONFIG_EVMS_MD | |
3730 | +dep_tristate ' EVMS AIX LVM Package' CONFIG_EVMS_AIX $CONFIG_EVMS | |
3731 | +dep_tristate ' EVMS OS/2 LVM Package' CONFIG_EVMS_OS2 $CONFIG_EVMS | |
3732 | +#dep_tristate ' EVMS Clustering Package' CONFIG_EVMS_ECR $CONFIG_EVMS | |
3733 | + | |
3734 | +if [ "$CONFIG_EVMS" != "n" ]; then | |
3735 | + choice ' EVMS Debug Level' \ | |
3736 | + "Critical CONFIG_EVMS_INFO_CRITICAL \ | |
3737 | + Serious CONFIG_EVMS_INFO_SERIOUS \ | |
3738 | + Error CONFIG_EVMS_INFO_ERROR \ | |
3739 | + Warning CONFIG_EVMS_INFO_WARNING \ | |
3740 | + Default CONFIG_EVMS_INFO_DEFAULT \ | |
3741 | + Details CONFIG_EVMS_INFO_DETAILS \ | |
3742 | + Debug CONFIG_EVMS_INFO_DEBUG \ | |
3743 | + Extra CONFIG_EVMS_INFO_EXTRA \ | |
3744 | + Entry_Exit CONFIG_EVMS_INFO_ENTRY_EXIT \ | |
3745 | + Everything CONFIG_EVMS_INFO_EVERYTHING" Default | |
3746 | +fi | |
3747 | + | |
3748 | +endmenu | |
3749 | + | |
3750 | diff -Naur linux-2002-09-30/drivers/evms/Makefile evms-2002-09-30/drivers/evms/Makefile | |
3751 | --- linux-2002-09-30/drivers/evms/Makefile Wed Dec 31 18:00:00 1969 | |
3752 | +++ evms-2002-09-30/drivers/evms/Makefile Mon Sep 16 15:55:24 2002 | |
3753 | @@ -0,0 +1,64 @@ | |
3754 | +# | |
3755 | +# Makefile for the kernel EVMS driver and modules. | |
3756 | +# | |
3757 | +# 08 March 2001, Mark Peloquin <peloquin@us.ibm.com> | |
3758 | +# | |
3759 | + | |
3760 | +O_TARGET := evmsdrvr.o | |
3761 | + | |
3762 | +export-objs := evms.o evms_passthru.o ldev_mgr.o dos_part.o lvm_vge.o \ | |
3763 | + snapshot.o evms_drivelink.o evms_bbr.o AIXlvm_vge.o \ | |
3764 | + os2lvm_vge.o evms_ecr.o md_core.o md_linear.o md_raid0.o \ | |
3765 | + md_raid1.o md_raid5.o md_xor.o s390_part.o gpt_part.o | |
3766 | + | |
3767 | +# Link order is important! Plugins must come first, then the EVMS core. | |
3768 | + | |
3769 | +obj-$(CONFIG_EVMS_LOCAL_DEV_MGR) += ldev_mgr.o | |
3770 | +obj-$(CONFIG_EVMS_DOS_SEGMENT_MGR) += dos_part.o | |
3771 | +obj-$(CONFIG_EVMS_GPT_SEGMENT_MGR) += gpt_part.o | |
3772 | +obj-$(CONFIG_EVMS_S390_SEGMENT_MGR) += s390_part.o | |
3773 | +obj-$(CONFIG_EVMS_MD) += md_core.o | |
3774 | +obj-$(CONFIG_EVMS_MD_LINEAR) += md_linear.o | |
3775 | +obj-$(CONFIG_EVMS_MD_RAID0) += md_raid0.o | |
3776 | +obj-$(CONFIG_EVMS_MD_RAID1) += md_raid1.o | |
3777 | +obj-$(CONFIG_EVMS_MD_RAID5) += md_raid5.o md_xor.o | |
3778 | +obj-$(CONFIG_EVMS_LVM) += lvm_vge.o | |
3779 | +obj-$(CONFIG_EVMS_AIX) += AIXlvm_vge.o | |
3780 | +obj-$(CONFIG_EVMS_OS2) += os2lvm_vge.o | |
3781 | +obj-$(CONFIG_EVMS_DRIVELINK) += evms_drivelink.o | |
3782 | +obj-$(CONFIG_EVMS_BBR) += evms_bbr.o | |
3783 | +obj-$(CONFIG_EVMS_SNAPSHOT) += snapshot.o | |
3784 | +obj-$(CONFIG_EVMS_ECR) += evms_ecr.o | |
3785 | +obj-$(CONFIG_EVMS) += evms_passthru.o evms.o | |
3786 | + | |
3787 | +EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_DEFAULT | |
3788 | +ifeq ($(CONFIG_EVMS_INFO_CRITICAL),y) | |
3789 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_CRITICAL | |
3790 | +endif | |
3791 | +ifeq ($(CONFIG_EVMS_INFO_SERIOUS),y) | |
3792 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_SERIOUS | |
3793 | +endif | |
3794 | +ifeq ($(CONFIG_EVMS_INFO_ERROR),y) | |
3795 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_ERROR | |
3796 | +endif | |
3797 | +ifeq ($(CONFIG_EVMS_INFO_WARNING),y) | |
3798 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_WARNING | |
3799 | +endif | |
3800 | +ifeq ($(CONFIG_EVMS_INFO_DETAILS),y) | |
3801 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_DETAILS | |
3802 | +endif | |
3803 | +ifeq ($(CONFIG_EVMS_INFO_DEBUG),y) | |
3804 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_DEBUG | |
3805 | +endif | |
3806 | +ifeq ($(CONFIG_EVMS_INFO_EXTRA),y) | |
3807 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_EXTRA | |
3808 | +endif | |
3809 | +ifeq ($(CONFIG_EVMS_INFO_ENTRY_EXIT),y) | |
3810 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_ENTRY_EXIT | |
3811 | +endif | |
3812 | +ifeq ($(CONFIG_EVMS_INFO_EVERYTHING),y) | |
3813 | + EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_EVERYTHING | |
3814 | +endif | |
3815 | + | |
3816 | +include $(TOPDIR)/Rules.make | |
3817 | + | |
3818 | diff -Naur linux-2002-09-30/drivers/evms/dos_part.c evms-2002-09-30/drivers/evms/dos_part.c | |
3819 | --- linux-2002-09-30/drivers/evms/dos_part.c Wed Dec 31 18:00:00 1969 | |
3820 | +++ evms-2002-09-30/drivers/evms/dos_part.c Fri Sep 13 16:09:55 2002 | |
3821 | @@ -0,0 +1,1452 @@ | |
3822 | +/* -*- linux-c -*- */ | |
3823 | +/* | |
3824 | + * | |
3825 | + * | |
3826 | + * Copyright (c) International Business Machines Corp., 2000 | |
3827 | + * | |
3828 | + * This program is free software; you can redistribute it and/or modify | |
3829 | + * it under the terms of the GNU General Public License as published by | |
3830 | + * the Free Software Foundation; either version 2 of the License, or | |
3831 | + * (at your option) any later version. | |
3832 | + * | |
3833 | + * This program is distributed in the hope that it will be useful, | |
3834 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
3835 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
3836 | + * the GNU General Public License for more details. | |
3837 | + * | |
3838 | + * You should have received a copy of the GNU General Public License | |
3839 | + * along with this program; if not, write to the Free Software | |
3840 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3841 | + * | |
3842 | + * | |
3843 | + */ | |
3844 | +/* | |
3845 | + * linux/drivers/evms/dos_part.c | |
3846 | + * | |
3847 | + * EVMS DOS partition manager | |
3848 | + * | |
3849 | + * Partial code extracted from | |
3850 | + * | |
3851 | + * linux/fs/partitions/msdos.c | |
3852 | + * | |
3853 | + */ | |
3854 | + | |
3855 | +#include <linux/config.h> | |
3856 | +#include <linux/module.h> | |
3857 | +#include <linux/kernel.h> | |
3858 | +#include <linux/config.h> | |
3859 | +#include <linux/fs.h> | |
3860 | +#include <linux/genhd.h> | |
3861 | +#include <linux/string.h> | |
3862 | +#include <linux/blk.h> | |
3863 | +#include <linux/init.h> | |
3864 | +#include <linux/iobuf.h> /* for kiobuf stuffs */ | |
3865 | + | |
3866 | +#ifdef CONFIG_BLK_DEV_IDE | |
3867 | +#include <linux/ide.h> /* IDE xlate */ | |
3868 | +#endif /* CONFIG_BLK_DEV_IDE */ | |
3869 | + | |
3870 | +#include <linux/evms/evms.h> | |
3871 | +#include <linux/evms/evms_os2.h> | |
3872 | + | |
3873 | +#include <asm/system.h> | |
3874 | +#include <asm/uaccess.h> | |
3875 | + | |
3876 | +/* prefix used in logging messages */ | |
3877 | +#define LOG_PREFIX "dos_part: " | |
3878 | + | |
3879 | +/* #include "msdos.h" */ | |
3880 | +#define MSDOS_LABEL_MAGIC 0xAA55 | |
3881 | +#define GPT_ENTIRE_DISK_INDICATOR 0xEE | |
3882 | +#define GPT_ESP_INDICATOR 0xEF | |
3883 | + | |
3884 | +/** | |
3885 | + * struct mbr_ebr - Skeletal MBR/EBR structure useful for our purposes | |
3886 | + * @unused1: skip IPL record code | |
3887 | + * @partitions: partition table | |
3888 | + * @signature: DOS magic | |
3889 | + * | |
3890 | + * skeletal access to parition table in MBR/EBR | |
3891 | + **/ | |
3892 | +struct mbr_ebr { | |
3893 | + u8 unused1[0x1be]; | |
3894 | + struct partition partitions[4]; | |
3895 | + u16 signature; | |
3896 | +}; | |
3897 | + | |
3898 | +/** | |
3899 | + * struct dos_private - Private data structure for this plugin | |
3900 | + * @source_object: object this IO will get remapped to | |
3901 | + * @start_sect: source object relative starting address in 512 byte units | |
3902 | + * @nr_sect: partition size in 512 bytes units | |
3903 | + * @type: partition type or filesystem format indicator | |
3904 | + * | |
3905 | + * private copy of the just the fields we require to remap IO requests | |
3906 | + * to the underlying object. | |
3907 | + **/ | |
3908 | +struct dos_private { | |
3909 | + struct evms_logical_node *source_disk; | |
3910 | + u64 start_sect; | |
3911 | + u64 nr_sects; | |
3912 | + unsigned char type; | |
3913 | +}; | |
3914 | + | |
3915 | +/** | |
3916 | + * struct extended_part - Structure used to track progress traversing an EBR chain | |
3917 | + * @extended: partition table in the extended boot record | |
3918 | + * @start_sect: address of the extended boot record in 512 byte units | |
3919 | + * @next_ebr_start: address of next ebr in the chain | |
3920 | + * @done: progress flag | |
3921 | + * | |
3922 | + * struct used to track extended boot record chain traversals. | |
3923 | + **/ | |
3924 | +struct extended_part { | |
3925 | + struct partition *extended; | |
3926 | + u64 start_sect; | |
3927 | + u64 next_ebr_start; | |
3928 | + int done; | |
3929 | +}; | |
3930 | + | |
3931 | +/* Global variables */ | |
3932 | +static int cur_comp_part_num; /* used to track non-primary | |
3933 | + * partition numbers | |
3934 | + */ | |
3935 | +static int exported_nodes; /* total # of exported segments | |
3936 | + * produced during this discovery. | |
3937 | + */ | |
3938 | + | |
3939 | +/* External references */ | |
3940 | +#if CONFIG_BLK_DEV_MD && CONFIG_AUTODETECT_RAID | |
3941 | +extern void md_autodetect_dev(kdev_t dev); | |
3942 | +#endif | |
3943 | + | |
3944 | +/* Prototypes */ | |
3945 | +static int mbr_ebr_partition_discover(struct evms_logical_node **); | |
3946 | +static int mbr_ebr_partition_delete(struct evms_logical_node *); | |
3947 | +static void mbr_ebr_partition_read(struct evms_logical_node *, | |
3948 | + struct buffer_head *); | |
3949 | +static void mbr_ebr_partition_write(struct evms_logical_node *, | |
3950 | + struct buffer_head *); | |
3951 | +static int mbr_ebr_partition_ioctl(struct evms_logical_node *, struct inode *, | |
3952 | + struct file *, unsigned int, unsigned long); | |
3953 | +static int mbr_ebr_partition_init_io(struct evms_logical_node *, | |
3954 | + int, u64, u64, void *); | |
3955 | + | |
3956 | +static struct evms_plugin_fops fops = { | |
3957 | + .discover = mbr_ebr_partition_discover, | |
3958 | + .delete = mbr_ebr_partition_delete, | |
3959 | + .read = mbr_ebr_partition_read, | |
3960 | + .write = mbr_ebr_partition_write, | |
3961 | + .init_io = mbr_ebr_partition_init_io, | |
3962 | + .ioctl = mbr_ebr_partition_ioctl | |
3963 | +}; | |
3964 | + | |
3965 | +#define EVMS_MSDOS_PARTITION_MANAGER_ID 1 | |
3966 | + | |
3967 | +static struct evms_plugin_header plugin_header = { | |
3968 | + .id = SetPluginID(IBM_OEM_ID, | |
3969 | + EVMS_SEGMENT_MANAGER, | |
3970 | + EVMS_MSDOS_PARTITION_MANAGER_ID), | |
3971 | + .version = { | |
3972 | + .major = 1, | |
3973 | + .minor = 1, | |
3974 | + .patchlevel = 1 | |
3975 | + }, | |
3976 | + .required_services_version = { | |
3977 | + .major = 0, | |
3978 | + .minor = 5, | |
3979 | + .patchlevel = 0 | |
3980 | + }, | |
3981 | + .fops = &fops | |
3982 | +}; | |
3983 | + | |
3984 | +/* | |
3985 | + * Many architectures don't like unaligned accesses, which is | |
3986 | + * frequently the case with the nr_sects and start_sect partition | |
3987 | + * table entries. | |
3988 | + */ | |
3989 | +#include <asm/unaligned.h> | |
3990 | + | |
3991 | +#define SYS_IND(p) (get_unaligned(&p->sys_ind)) | |
3992 | +#define NR_SECTS(p) (u64)({ __typeof__(p->nr_sects) __a = \ | |
3993 | + get_unaligned(&p->nr_sects); \ | |
3994 | + le32_to_cpu(__a); \ | |
3995 | + }) | |
3996 | + | |
3997 | +#define START_SECT(p) (u64)({ __typeof__(p->start_sect) __a = \ | |
3998 | + get_unaligned(&p->start_sect); \ | |
3999 | + le32_to_cpu(__a); \ | |
4000 | + }) | |
4001 | + | |
4002 | +/******************************************/ | |
4003 | +/* List Support - Variables, & Functions */ | |
4004 | +/******************************************/ | |
4005 | + | |
4006 | +/* Typedefs */ | |
4007 | + | |
4008 | +struct segment_list_node { | |
4009 | + struct evms_logical_node *segment; | |
4010 | + struct segment_list_node *next; | |
4011 | +}; | |
4012 | + | |
4013 | +struct disk_list_node { | |
4014 | + struct evms_logical_node *disk; | |
4015 | + struct segment_list_node *segment_list; | |
4016 | + struct disk_list_node *next; | |
4017 | +}; | |
4018 | + | |
4019 | +/* Variables */ | |
4020 | + | |
4021 | +static struct disk_list_node *my_disk_list; | |
4022 | + | |
4023 | +/* Functions */ | |
4024 | + | |
4025 | +static struct disk_list_node ** | |
4026 | +lookup_disk(struct evms_logical_node *disk) | |
4027 | +{ | |
4028 | + struct disk_list_node **ldln; | |
4029 | + | |
4030 | + ldln = &my_disk_list; | |
4031 | + while (*ldln) { | |
4032 | + if ((*ldln)->disk == disk) | |
4033 | + break; | |
4034 | + ldln = &(*ldln)->next; | |
4035 | + } | |
4036 | + return (ldln); | |
4037 | +} | |
4038 | + | |
4039 | +static struct segment_list_node ** | |
4040 | +lookup_segment(struct disk_list_node *disk, struct evms_logical_node *segment) | |
4041 | +{ | |
4042 | + struct segment_list_node **lsln; | |
4043 | + | |
4044 | + lsln = &disk->segment_list; | |
4045 | + while (*lsln) { | |
4046 | + if ((*lsln)->segment == segment) | |
4047 | + break; | |
4048 | + lsln = &(*lsln)->next; | |
4049 | + } | |
4050 | + return (lsln); | |
4051 | +} | |
4052 | + | |
4053 | +static struct evms_logical_node * | |
4054 | +find_segment_on_disk(struct evms_logical_node *disk, | |
4055 | + u64 start_sect, u64 nr_sects) | |
4056 | +{ | |
4057 | + struct evms_logical_node *rc = NULL; | |
4058 | + struct disk_list_node **ldln; | |
4059 | + struct segment_list_node **lsln; | |
4060 | + struct dos_private *dos_prv; | |
4061 | + | |
4062 | + ldln = lookup_disk(disk); | |
4063 | + if (*ldln) { | |
4064 | + /* disk found in list */ | |
4065 | + /* attempt to find segment */ | |
4066 | + | |
4067 | + lsln = &(*ldln)->segment_list; | |
4068 | + while (*lsln) { | |
4069 | + dos_prv = (*lsln)->segment->private; | |
4070 | + if (dos_prv->start_sect == start_sect) | |
4071 | + if (dos_prv->nr_sects == nr_sects) | |
4072 | + break; | |
4073 | + lsln = &(*lsln)->next; | |
4074 | + } | |
4075 | + if (*lsln) | |
4076 | + rc = (*lsln)->segment; | |
4077 | + } | |
4078 | + return (rc); | |
4079 | +} | |
4080 | + | |
4081 | +/* function description: add_segment_to_disk | |
4082 | + * | |
4083 | + * this function attempts to add a segment to the segment | |
4084 | + * list of a disk. if the specified disk is not found, it | |
4085 | + * will be added to the global disk list. this function will | |
4086 | + * return a pointer to the matching segment in the disk's | |
4087 | + * segment list. the caller must compare the returned pointer | |
4088 | + * to the specified segment to see if the | |
4089 | + * specified segment was already present in the disk's segment | |
4090 | + * list. if the return pointer matches the specified segment, | |
4091 | + * then the specified segment was added to the list. if the | |
4092 | + * return segment pointer to does not match the specified | |
4093 | + * segment pointer, then the specified segment pointer was | |
4094 | + * a duplicate and can be thrown away. | |
4095 | + */ | |
4096 | +static int | |
4097 | +add_segment_to_disk(struct evms_logical_node *disk, | |
4098 | + struct evms_logical_node *segment) | |
4099 | +{ | |
4100 | + int rc = 0; | |
4101 | + struct disk_list_node **ldln, *new_disk; | |
4102 | + struct segment_list_node **lsln, *new_segment; | |
4103 | + | |
4104 | + ldln = lookup_disk(disk); | |
4105 | + if (*ldln == NULL) { | |
4106 | + /* disk not in list, add disk */ | |
4107 | + new_disk = kmalloc(sizeof (*new_disk), GFP_KERNEL); | |
4108 | + if (new_disk) { | |
4109 | + memset(new_disk, 0, sizeof (*new_disk)); | |
4110 | + new_disk->disk = disk; | |
4111 | + *ldln = new_disk; | |
4112 | + } else { | |
4113 | + rc = -ENOMEM; | |
4114 | + } | |
4115 | + } | |
4116 | + if (!rc) { | |
4117 | + /* attempt to add segment */ | |
4118 | + lsln = lookup_segment(*ldln, segment); | |
4119 | + if (*lsln == NULL) { | |
4120 | + /* segment not in list, add segment */ | |
4121 | + new_segment = | |
4122 | + kmalloc(sizeof (*new_segment), GFP_KERNEL); | |
4123 | + if (new_segment) { | |
4124 | + memset(new_segment, 0, sizeof (*new_segment)); | |
4125 | + new_segment->segment = segment; | |
4126 | + *lsln = new_segment; | |
4127 | + } else { | |
4128 | + rc = -ENOMEM; | |
4129 | + } | |
4130 | + } else | |
4131 | + rc = -1; | |
4132 | + } | |
4133 | + return (rc); | |
4134 | +} | |
4135 | + | |
4136 | +static int | |
4137 | +remove_segment_from_disk(struct evms_logical_node *disk, | |
4138 | + struct evms_logical_node *segment, | |
4139 | + struct evms_logical_node **empty_disk) | |
4140 | +{ | |
4141 | + int rc = 0; | |
4142 | + struct disk_list_node **ldln, *tmp_disk_node; | |
4143 | + struct segment_list_node **lsln, *tmp_segment_node; | |
4144 | + | |
4145 | + *empty_disk = NULL; | |
4146 | + ldln = lookup_disk(disk); | |
4147 | + if (*ldln == NULL) { | |
4148 | + rc = -1; | |
4149 | + } else { | |
4150 | + /* disk found in list */ | |
4151 | + /* attempt to add segment */ | |
4152 | + lsln = lookup_segment(*ldln, segment); | |
4153 | + if (*lsln == NULL) { | |
4154 | + rc = -2; | |
4155 | + } else { | |
4156 | + tmp_segment_node = *lsln; | |
4157 | + /* remove segment from list */ | |
4158 | + *lsln = (*lsln)->next; | |
4159 | + /* free the segment list node */ | |
4160 | + kfree(tmp_segment_node); | |
4161 | + | |
4162 | + if ((*ldln)->segment_list == NULL) { | |
4163 | + tmp_disk_node = *ldln; | |
4164 | + *empty_disk = tmp_disk_node->disk; | |
4165 | + /* remove disk from list */ | |
4166 | + *ldln = (*ldln)->next; | |
4167 | + /* free the disk list node */ | |
4168 | + kfree(tmp_disk_node); | |
4169 | + } | |
4170 | + } | |
4171 | + } | |
4172 | + return (rc); | |
4173 | +} | |
4174 | + | |
4175 | +static inline int | |
4176 | +is_extended_partition(struct partition *p) | |
4177 | +{ | |
4178 | + return (SYS_IND(p) == DOS_EXTENDED_PARTITION || | |
4179 | + SYS_IND(p) == WIN98_EXTENDED_PARTITION || | |
4180 | + SYS_IND(p) == LINUX_EXTENDED_PARTITION); | |
4181 | +} | |
4182 | + | |
4183 | +static inline u64 | |
4184 | +part_start(struct partition *part, u64 ext_start, u64 ebr_start) | |
4185 | +{ | |
4186 | + u64 pstart = START_SECT(part); | |
4187 | + pstart += (is_extended_partition(part)) ? ext_start : ebr_start; | |
4188 | + return (pstart); | |
4189 | +} | |
4190 | + | |
4191 | +static int | |
4192 | +validate_mbr_ebr(struct evms_logical_node *node, | |
4193 | + struct mbr_ebr *mbr_ebr, u64 ext_start, | |
4194 | + u64 ebr_start) | |
4195 | +{ | |
4196 | + int valid_mbr_ebr, i, j, mbr_flag; | |
4197 | + struct partition *pi, *pj; | |
4198 | + u64 pi_start, pi_end, pj_start, pj_end; | |
4199 | + | |
4200 | + /* assume an MBR */ | |
4201 | + mbr_flag = TRUE; | |
4202 | + | |
4203 | + /* assume its valid */ | |
4204 | + valid_mbr_ebr = TRUE; | |
4205 | + | |
4206 | + /* check for valid signature */ | |
4207 | + if (mbr_ebr->signature != cpu_to_le16(MSDOS_LABEL_MAGIC)) { | |
4208 | + LOG_DEBUG("%s: invalid signature on '%s'!\n", | |
4209 | + __FUNCTION__, node->name); | |
4210 | + valid_mbr_ebr = FALSE; | |
4211 | + } | |
4212 | + | |
4213 | + /* check for an AIX IPL signature */ | |
4214 | +#define IPLRECID 0xc9c2d4c1 /* Value is EBCIDIC 'IBMA' */ | |
4215 | + if (*(unsigned int *) mbr_ebr == IPLRECID) { | |
4216 | + LOG_DEBUG("%s: found an AIX IPL signature on '%s'\n", | |
4217 | + __FUNCTION__, node->name); | |
4218 | + valid_mbr_ebr = FALSE; | |
4219 | + } | |
4220 | + | |
4221 | + /* check for boot sector fields */ | |
4222 | + | |
4223 | +#if 0 //Remove checking of the first byte | |
4224 | + | |
4225 | + /* attempt to make some initial assumptions about | |
4226 | + * what type of data structure this could be. we | |
4227 | + * start by checking the 1st byte. we can tell a | |
4228 | + * few things based on what is or isn't there. | |
4229 | + */ | |
4230 | + if (valid_mbr_ebr == TRUE) | |
4231 | + switch (*(u_char *) mbr_ebr) { | |
4232 | + /* check for JMP as 1st instruction | |
4233 | + * if found, assume (for now), that | |
4234 | + * this is a boot sector. | |
4235 | + */ | |
4236 | + /* Removed the JMP opcode check because it's not enough to determine | |
4237 | + * that this sector does not have a valid MBR. | |
4238 | + * Note: To avoid going thru validation process of partition table, | |
4239 | + * it's necessary to have a better boot sector check | |
4240 | + * (eg. JMP opcode && other conditions) */ | |
4241 | + /* | |
4242 | + case 0xEB: | |
4243 | + LOG_DEBUG("%s: boot sector detected!\n", __FUNCTION__); | |
4244 | + valid_mbr_ebr = FALSE; | |
4245 | + */ | |
4246 | + /* let this fall thru to pick up the | |
4247 | + * mbr_flag == FALSE. | |
4248 | + */ | |
4249 | + | |
4250 | + /* the MBR should contain boot strap | |
4251 | + * code, so we don't expect the 1st | |
4252 | + * byte to be a 0x0. If the 1st byte | |
4253 | + * IS 0x0, its assumed (for now) to | |
4254 | + * be an EBR. | |
4255 | + */ | |
4256 | + case 0: | |
4257 | + mbr_flag = FALSE; | |
4258 | + break; | |
4259 | + } | |
4260 | +#endif //Remove checking of the first byte | |
4261 | + | |
4262 | + if (valid_mbr_ebr == TRUE) { | |
4263 | + /* dump the partition table entries in debug mode */ | |
4264 | + LOG_DEBUG | |
4265 | + ("%s: disk relative starts: ext_part("PFU64"), ebr("PFU64").\n", | |
4266 | + __FUNCTION__, ext_start, ebr_start); | |
4267 | + for (i = 0; i < 4; i++) { | |
4268 | + pi = &mbr_ebr->partitions[i]; | |
4269 | + LOG_DEBUG | |
4270 | + ("%s: Partition: index(%d), start("PFU64"), size("PFU64"), sys(0x%x).\n", | |
4271 | + __FUNCTION__, i, START_SECT(pi), NR_SECTS(pi), | |
4272 | + SYS_IND(pi)); | |
4273 | + } | |
4274 | + | |
4275 | + /* check for PMBR (Protected Master Boot Record) | |
4276 | + * and skip this node if found | |
4277 | + */ | |
4278 | + for (i = 0; i < 4; i++) { | |
4279 | + pi = &mbr_ebr->partitions[i]; | |
4280 | + | |
4281 | + if (SYS_IND(pi) == 0xEE) { | |
4282 | + valid_mbr_ebr = FALSE; | |
4283 | + LOG_DETAILS | |
4284 | + ("%s: detected PMBR on '%s', skipping.\n", | |
4285 | + __FUNCTION__, node->name); | |
4286 | + break; | |
4287 | + } | |
4288 | + } | |
4289 | + | |
4290 | + /* check of this segment is marked as non-dividable | |
4291 | + * and skip if found | |
4292 | + */ | |
4293 | + if (node->iflags & EVMS_TOP_SEGMENT) { | |
4294 | + valid_mbr_ebr = FALSE; | |
4295 | + } | |
4296 | + } | |
4297 | + | |
4298 | + if (valid_mbr_ebr == TRUE) { | |
4299 | + /* check for mbr/ebr partition table validity */ | |
4300 | + for (i = 0; i < 4; i++) { | |
4301 | + pi = &mbr_ebr->partitions[i]; | |
4302 | + if (NR_SECTS(pi)) { | |
4303 | + /* check for partition extending past end of node */ | |
4304 | + pi_start = part_start(pi, ext_start, ebr_start); | |
4305 | + pi_end = pi_start + NR_SECTS(pi) - 1; | |
4306 | + if (pi_end >= node->total_vsectors) { | |
4307 | + LOG_DEBUG | |
4308 | + ("%s: partition(%d) ends("PFU64") beyond the end of the disk(%s,"PFU64")!\n", | |
4309 | + __FUNCTION__, i, pi_end, | |
4310 | + node->name, node->total_vsectors); | |
4311 | + valid_mbr_ebr = FALSE; | |
4312 | + } | |
4313 | + if (valid_mbr_ebr == FALSE) | |
4314 | + break; | |
4315 | + | |
4316 | + /* check for partition overlap */ | |
4317 | + for (j = i + 1; j < 4; j++) { | |
4318 | + pj = &mbr_ebr->partitions[j]; | |
4319 | + if (NR_SECTS(pj)) { | |
4320 | + pj_start = | |
4321 | + part_start(pj, ext_start, | |
4322 | + ebr_start); | |
4323 | + pj_end = | |
4324 | + pj_start + NR_SECTS(pj) - 1; | |
4325 | + if (pi_start == pj_start) { | |
4326 | + valid_mbr_ebr = FALSE; | |
4327 | + } else if (pi_start < pj_start) { | |
4328 | + if (pi_end >= pj_start) | |
4329 | + valid_mbr_ebr = | |
4330 | + FALSE; | |
4331 | + } else if (pi_start <= pj_end) | |
4332 | + valid_mbr_ebr = FALSE; | |
4333 | + | |
4334 | + if (valid_mbr_ebr == FALSE) { | |
4335 | + LOG_DEBUG | |
4336 | + ("%s: overlapping partitions(%d,%d) detected on '%s'!\n", | |
4337 | + __FUNCTION__, i, j, | |
4338 | + node->name); | |
4339 | + break; | |
4340 | + } | |
4341 | + } | |
4342 | + } | |
4343 | + if (valid_mbr_ebr == FALSE) | |
4344 | + break; | |
4345 | + } | |
4346 | + } | |
4347 | + } | |
4348 | + if (valid_mbr_ebr == TRUE) { | |
4349 | + LOG_DEBUG("%s: valid %cBR detected on '%s'!\n", __FUNCTION__, | |
4350 | + (mbr_flag == TRUE) ? 'M' : 'E', node->name); | |
4351 | + } else { | |
4352 | + LOG_DEBUG("%s: no valid MBR/EBR detected on '%s'!\n", | |
4353 | + __FUNCTION__, node->name); | |
4354 | + } | |
4355 | + return (valid_mbr_ebr); | |
4356 | +} | |
4357 | + | |
4358 | +/* | |
4359 | + * Function: add_segment | |
4360 | + */ | |
4361 | +static int | |
4362 | +mbr_ebr_process_segment(struct evms_logical_node **discover_list, | |
4363 | + struct evms_logical_node *node, | |
4364 | + u64 start_sect, | |
4365 | + u64 nr_sects, | |
4366 | + unsigned char type, int part_num, char *partition_name) | |
4367 | +{ | |
4368 | + struct dos_private *dos_prv = NULL; | |
4369 | + struct evms_logical_node *segment; | |
4370 | + int rc = 0; | |
4371 | + | |
4372 | + segment = find_segment_on_disk(node, start_sect, nr_sects); | |
4373 | + if (segment) { | |
4374 | + LOG_DETAILS("exporting segment '%s'.\n", segment->name); | |
4375 | + } else { | |
4376 | + dos_prv = kmalloc(sizeof (*dos_prv), GFP_KERNEL); | |
4377 | + if (dos_prv) { | |
4378 | + memset(dos_prv, 0, sizeof (*dos_prv)); | |
4379 | + dos_prv->source_disk = node; | |
4380 | + dos_prv->start_sect = start_sect; | |
4381 | + dos_prv->nr_sects = nr_sects; | |
4382 | + dos_prv->type = type; | |
4383 | + rc = evms_cs_allocate_logical_node(&segment); | |
4384 | + } else { | |
4385 | + rc = -ENOMEM; | |
4386 | + } | |
4387 | + if (!rc) { | |
4388 | + segment->plugin = &plugin_header; | |
4389 | + segment->system_id = (unsigned int) type; | |
4390 | + segment->total_vsectors = nr_sects; | |
4391 | + segment->block_size = node->block_size; | |
4392 | + segment->hardsector_size = node->hardsector_size; | |
4393 | + segment->private = dos_prv; | |
4394 | + segment->flags = node->flags; | |
4395 | + if (partition_name) | |
4396 | + strcpy(segment->name, partition_name); | |
4397 | + else { | |
4398 | + strcpy(segment->name, node->name); | |
4399 | + if (GetPluginType(node->plugin->id) == | |
4400 | + EVMS_SEGMENT_MANAGER) { | |
4401 | + strcat(segment->name, "."); | |
4402 | + } | |
4403 | + sprintf(segment->name + strlen(segment->name), | |
4404 | + "%d", part_num); | |
4405 | + } | |
4406 | + /* watch for super floppy format gpt system partition | |
4407 | + * and dont let it be sub divided | |
4408 | + */ | |
4409 | + if (segment->system_id == GPT_ESP_INDICATOR) { | |
4410 | + node->iflags |= EVMS_TOP_SEGMENT; | |
4411 | + } | |
4412 | + LOG_DETAILS("creating segment '%s'.\n", segment->name); | |
4413 | + rc = add_segment_to_disk(node, segment); | |
4414 | + if (rc) { | |
4415 | + LOG_ERROR | |
4416 | + ("%s: error(%d) adding segment '%s'!\n", | |
4417 | + __FUNCTION__, rc, segment->name); | |
4418 | + rc = 0; | |
4419 | + } else { | |
4420 | + MOD_INC_USE_COUNT; | |
4421 | + } | |
4422 | + } | |
4423 | + if (rc) { | |
4424 | + if (dos_prv) | |
4425 | + kfree(dos_prv); | |
4426 | + if (segment) | |
4427 | + evms_cs_deallocate_logical_node(segment); | |
4428 | + } | |
4429 | + } | |
4430 | + if (!rc) { | |
4431 | + evms_cs_add_logical_node_to_list(discover_list, segment); | |
4432 | + exported_nodes++; | |
4433 | + } | |
4434 | + return rc; | |
4435 | +} | |
4436 | + | |
4437 | +static void | |
4438 | +print_partition_info(char *leading_comment, struct partition *p) | |
4439 | +{ | |
4440 | + LOG_EXTRA | |
4441 | + ("%s: boot_ind(0x%02x), sys_ind(0x%02x), startCHS(%u,%u,%u), endCHS(%u,%u,%u), startLBA("PFU64"), sizeLBA("PFU64")\n", | |
4442 | + leading_comment, p->boot_ind, p->sys_ind, p->cyl, p->head, | |
4443 | + p->sector, p->end_cyl, p->end_head, p->end_sector, START_SECT(p), | |
4444 | + NR_SECTS(p)); | |
4445 | +} | |
4446 | + | |
4447 | +#ifdef CONFIG_BSD_DISKLABEL | |
4448 | +#define BSD_DISKLABEL_PART_TABLE_SECTOR_OFFSET 1 | |
4449 | +static void | |
4450 | +print_bsd_partition_info(char *leading_comment, struct bsd_partition *p) | |
4451 | +{ | |
4452 | + LOG_EXTRA | |
4453 | + ("%s: p_size(%u), p_offset(%u), p_fsize(%u), p_fstype(0x%02X), p_frag(0x%02X), p_cpg(%u)\n", | |
4454 | + leading_comment, p->p_size, p->p_offset, p->p_fsize, p->p_fstype, | |
4455 | + p->p_frag, p->p_cpg); | |
4456 | +} | |
4457 | + | |
4458 | +/* | |
4459 | + * bsd_disklabel_partition | |
4460 | + * | |
4461 | + * Return: | |
4462 | + * - 0 for 0 partition | |
4463 | + * - (positive) number for number of BSD partitions found | |
4464 | + * - (negative) error code | |
4465 | + */ | |
4466 | +static int | |
4467 | +bsd_disklabel_partition(struct evms_logical_node **discover_list, | |
4468 | + struct evms_logical_node *node, struct partition *bsd) | |
4469 | +{ | |
4470 | + struct bsd_disklabel *l; | |
4471 | + struct bsd_partition *p; | |
4472 | + int max_partitions; | |
4473 | + char *data; | |
4474 | + int rc = 0; | |
4475 | + int count = 0; | |
4476 | + | |
4477 | + data = kmalloc(node->hardsector_size, GFP_KERNEL); | |
4478 | + if (data) | |
4479 | + rc = INIT_IO(node, | |
4480 | + 0, | |
4481 | + START_SECT(bsd) + | |
4482 | + BSD_DISKLABEL_PART_TABLE_SECTOR_OFFSET, 1, data); | |
4483 | + else | |
4484 | + rc = -ENOMEM; | |
4485 | + if (!rc) { | |
4486 | + | |
4487 | + l = (struct bsd_disklabel *) data; | |
4488 | + if (l->d_magic == BSD_DISKMAGIC) { | |
4489 | + | |
4490 | + max_partitions = | |
4491 | + ((SYS_IND(bsd) == | |
4492 | + OPENBSD_PARTITION) ? OPENBSD_MAXPARTITIONS : | |
4493 | + BSD_MAXPARTITIONS); | |
4494 | + if (l->d_npartitions < max_partitions) | |
4495 | + max_partitions = l->d_npartitions; | |
4496 | + for (p = l->d_partitions; | |
4497 | + p - l->d_partitions < max_partitions; p++) { | |
4498 | + if (p->p_fstype != BSD_FS_UNUSED) { | |
4499 | + evmsLOG2(EVMS_INFO_EXTRA, | |
4500 | + (print_bsd_partition_info | |
4501 | + (__FUNCTION__, p))); | |
4502 | + rc = mbr_ebr_process_segment | |
4503 | + (discover_list, node, | |
4504 | + (u64) p->p_offset, | |
4505 | + (u64) p->p_size, p->p_fstype, | |
4506 | + cur_comp_part_num++, NULL); | |
4507 | + if (rc) | |
4508 | + break; | |
4509 | + count++; | |
4510 | + } | |
4511 | + } | |
4512 | + } | |
4513 | + } | |
4514 | + if (data) | |
4515 | + kfree(data); | |
4516 | + if (!rc) | |
4517 | + rc = count; | |
4518 | + LOG_DETAILS("%s: exported (%d) partitions\n", __FUNCTION__, rc); | |
4519 | + return rc; | |
4520 | +} | |
4521 | +#endif | |
4522 | + | |
4523 | +#ifdef CONFIG_UNIXWARE_DISKLABEL | |
4524 | +#define UNIXWARE_PART_TABLE_SECTOR_OFFSET 29 | |
4525 | + | |
4526 | +/* | |
4527 | + * unixware_partition | |
4528 | + * | |
4529 | + * Return: | |
4530 | + * - 0 for 0 partition | |
4531 | + * - (positive) number for number of UNIXWARE partitions found | |
4532 | + * - (negative) error code | |
4533 | + */ | |
4534 | +static int | |
4535 | +unixware_partition(struct evms_logical_node **discover_list, | |
4536 | + struct evms_logical_node *node, | |
4537 | + struct partition *unixware_part) | |
4538 | +{ | |
4539 | + struct unixware_disklabel *l; | |
4540 | + struct unixware_slice *p; | |
4541 | + char *data = NULL; | |
4542 | + int rc = 0; | |
4543 | + int count = 0; | |
4544 | + | |
4545 | + data = kmalloc(node->hardsector_size, GFP_KERNEL); | |
4546 | + if (data) | |
4547 | + rc = INIT_IO(node, | |
4548 | + 0, | |
4549 | + START_SECT(unixware_part) + | |
4550 | + UNIXWARE_PART_TABLE_SECTOR_OFFSET, 1, data); | |
4551 | + else | |
4552 | + rc = -ENOMEM; | |
4553 | + if (!rc) { | |
4554 | + l = (struct unixware_disklabel *) data; | |
4555 | + if (le32_to_cpu(l->d_magic) == UNIXWARE_DISKMAGIC && | |
4556 | + le32_to_cpu(l->vtoc.v_magic) == UNIXWARE_DISKMAGIC2) { | |
4557 | + p = &l->vtoc.v_slice[1]; /* The 0th slice is the same as whole disk. */ | |
4558 | + while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { | |
4559 | + if (p->s_label != UNIXWARE_FS_UNUSED) { | |
4560 | + rc = mbr_ebr_process_segment | |
4561 | + (discover_list, node, START_SECT(p), | |
4562 | + NR_SECTS(p), UNIXWARE_PARTITION, | |
4563 | + cur_comp_part_num++, NULL); | |
4564 | + if (rc) | |
4565 | + break; | |
4566 | + count++; | |
4567 | + } | |
4568 | + p++; | |
4569 | + } | |
4570 | + } | |
4571 | + } | |
4572 | + if (data) | |
4573 | + kfree(data); | |
4574 | + if (!rc) | |
4575 | + rc = count; | |
4576 | + LOG_DETAILS("%s: exported (%d) partitions\n", __FUNCTION__, rc); | |
4577 | + return rc; | |
4578 | +} | |
4579 | +#endif | |
4580 | + | |
4581 | +#ifdef CONFIG_SOLARIS_X86_PARTITION | |
4582 | +#define SOLARIS_X86_PART_TABLE_SECTOR_OFFSET 1 | |
4583 | +/* | |
4584 | + * solaris_x86_partition | |
4585 | + * | |
4586 | + * Return: | |
4587 | + * - 0 for 0 partition | |
4588 | + * - (positive) number for number of solaris partitions found | |
4589 | + * - (negative) error code | |
4590 | + */ | |
4591 | +static int | |
4592 | +solaris_x86_partition(struct evms_logical_node **discover_list, | |
4593 | + struct evms_logical_node *node, | |
4594 | + struct partition *solaris_x86, int probe_only) | |
4595 | +{ /* if TRUE, do not add segments */ | |
4596 | + long offset = START_SECT(solaris_x86); | |
4597 | + struct solaris_x86_vtoc *v; | |
4598 | + struct solaris_x86_slice *s; | |
4599 | + int i; | |
4600 | + char *data = NULL; | |
4601 | + int rc = 0; | |
4602 | + int count = 0; | |
4603 | + | |
4604 | + data = kmalloc(node->hardsector_size, GFP_KERNEL); | |
4605 | + if (data) | |
4606 | + rc = INIT_IO(node, | |
4607 | + 0, | |
4608 | + START_SECT(solaris_x86) + | |
4609 | + SOLARIS_X86_PART_TABLE_SECTOR_OFFSET, 1, data); | |
4610 | + else | |
4611 | + rc = -ENOMEM; | |
4612 | + if (!rc) { | |
4613 | + | |
4614 | + v = (struct solaris_x86_vtoc *) data; | |
4615 | + | |
4616 | + if (v->v_sanity == SOLARIS_X86_VTOC_SANE) { | |
4617 | + if (v->v_version != 1) { | |
4618 | + LOG_WARNING | |
4619 | + ("%s: cannot handle version %d vtoc>\n", | |
4620 | + __FUNCTION__, v->v_version); | |
4621 | + } else { | |
4622 | + for (i = 0; i < v->v_nparts; i++) { | |
4623 | + s = &v->v_slice[i]; | |
4624 | + LOG_EXTRA | |
4625 | + ("s[%d] s_tag(%u), s_flag(%u), s_start(%u), s_size(%u), last_sector(%u)\n", | |
4626 | + i, s->s_tag, s->s_flag, s->s_start, | |
4627 | + s->s_size, | |
4628 | + s->s_start + s->s_size - 1); | |
4629 | + | |
4630 | + if ((s->s_size == 0) | |
4631 | + || (s->s_tag == 0x05)) | |
4632 | + continue; | |
4633 | + if (!probe_only) { | |
4634 | + rc = mbr_ebr_process_segment | |
4635 | + (discover_list, node, | |
4636 | + (u64) (s->s_start + | |
4637 | + offset), | |
4638 | + (u64) s->s_size, | |
4639 | + SOLARIS_X86_PARTITION, | |
4640 | + cur_comp_part_num++, NULL); | |
4641 | + if (rc) | |
4642 | + break; | |
4643 | + } | |
4644 | + count++; | |
4645 | + } | |
4646 | + } | |
4647 | + } | |
4648 | + } | |
4649 | + if (data) | |
4650 | + kfree(data); | |
4651 | + if (!rc) | |
4652 | + rc = count; | |
4653 | + LOG_DETAILS("%s: %s (%d) partitions\n", | |
4654 | + __FUNCTION__, probe_only ? " " : "exported", rc); | |
4655 | + return rc; | |
4656 | +} | |
4657 | +#endif | |
4658 | + | |
4659 | +/* | |
4660 | + * os2lvm_partition() looks for DLAT at last sector of the track containing MBR/EBR | |
4661 | + * | |
4662 | + * Returns: 1 - os2 DLAT was found | |
4663 | + * 0 otherwise | |
4664 | + * | |
4665 | + */ | |
4666 | +static int | |
4667 | +os2lvm_partition(u64 MBR_EBR_sect, | |
4668 | + struct evms_logical_node *node, struct dla_table_sector *dlat) | |
4669 | +{ | |
4670 | + struct hd_geometry geometry; | |
4671 | + int rc; | |
4672 | + u32 crc_hold; | |
4673 | + | |
4674 | + rc = evms_cs_kernel_ioctl(node, HDIO_GETGEO, (unsigned long) &geometry); | |
4675 | + if (rc) { | |
4676 | + LOG_SERIOUS("%s: ioctl failed(%u) on '%s'\n", | |
4677 | + __FUNCTION__, rc, node->name); | |
4678 | + } else | |
4679 | + if (!INIT_IO(node, 0, MBR_EBR_sect + geometry.sectors - 1, 1, dlat)) | |
4680 | + { | |
4681 | + if ((dlat->DLA_Signature1 == cpu_to_le32(DLA_TABLE_SIGNATURE1)) | |
4682 | + && (dlat->DLA_Signature2 == | |
4683 | + cpu_to_le32(DLA_TABLE_SIGNATURE2))) { | |
4684 | + crc_hold = le32_to_cpu(dlat->DLA_CRC); | |
4685 | + dlat->DLA_CRC = 0; | |
4686 | + if (evms_cs_calculate_crc | |
4687 | + (EVMS_INITIAL_CRC, (void *) dlat, | |
4688 | + node->hardsector_size) == crc_hold) | |
4689 | + return 1; | |
4690 | + } | |
4691 | + } | |
4692 | + return 0; | |
4693 | +} | |
4694 | + | |
4695 | +static int | |
4696 | +mbr_ebr_process_logical_drive(struct evms_logical_node **discover_list, | |
4697 | + struct evms_logical_node *node, | |
4698 | + struct extended_part *ext_info, | |
4699 | + int i, | |
4700 | + struct partition *p, | |
4701 | + int os2lvm, struct dla_table_sector *dlat) | |
4702 | +{ | |
4703 | + int rc = 0; | |
4704 | + char tmp_buf[EVMS_VOLUME_NAME_SIZE], *partition_name; | |
4705 | + | |
4706 | + LOG_EXTRA("%s: PartitionTableIndex(%i), Start("PFU64"), Size("PFU64")\n", | |
4707 | + __FUNCTION__, i, START_SECT(p), NR_SECTS(p)); | |
4708 | + | |
4709 | + if (NR_SECTS(p)) { | |
4710 | + if (is_extended_partition(p)) { | |
4711 | + ext_info->next_ebr_start = | |
4712 | + (u64) (START_SECT(p) + | |
4713 | + START_SECT(ext_info->extended)); | |
4714 | + ext_info->done = FALSE; /* not done yet */ | |
4715 | + } else { | |
4716 | + partition_name = NULL; | |
4717 | + if (os2lvm && p->sys_ind != LVM_PARTITION_INDICATOR && | |
4718 | + le32_to_cpu(dlat->DLA_Array[i].Partition_Start) == | |
4719 | + (ext_info->start_sect + START_SECT(p)) | |
4720 | + && le32_to_cpu(dlat->DLA_Array[i].Partition_Size) == | |
4721 | + NR_SECTS(p) | |
4722 | + && dlat->DLA_Array[i].Drive_Letter != '\0') { | |
4723 | + sprintf(tmp_buf, "os2/%c", | |
4724 | + dlat->DLA_Array[i].Drive_Letter); | |
4725 | + partition_name = tmp_buf; | |
4726 | + } | |
4727 | + evmsLOG2(EVMS_INFO_EXTRA, | |
4728 | + (print_partition_info(__FUNCTION__, p))); | |
4729 | + | |
4730 | + rc = mbr_ebr_process_segment(discover_list, | |
4731 | + node, | |
4732 | + ext_info->start_sect + | |
4733 | + START_SECT(p), NR_SECTS(p), | |
4734 | + p->sys_ind, | |
4735 | + cur_comp_part_num++, | |
4736 | + partition_name); | |
4737 | + } | |
4738 | + } | |
4739 | + return (rc); | |
4740 | +} | |
4741 | + | |
4742 | +static int | |
4743 | +mbr_ebr_process_ebr(struct evms_logical_node **discover_list, | |
4744 | + struct evms_logical_node *node, | |
4745 | + struct extended_part *ext_info, struct mbr_ebr *ebr) | |
4746 | +{ | |
4747 | + int rc = 0, i, os2lvm; | |
4748 | + struct partition *p; | |
4749 | + struct dla_table_sector *dlat = NULL; | |
4750 | + | |
4751 | + /* allocate space for the OS2 DLAT info */ | |
4752 | + dlat = kmalloc(node->hardsector_size, GFP_KERNEL); | |
4753 | + if (dlat) { | |
4754 | + /* read the dlat for this mbr */ | |
4755 | + os2lvm = os2lvm_partition(ext_info->start_sect, node, dlat); | |
4756 | + | |
4757 | + /* walk thru the partition table in the mbr | |
4758 | + * processing each partition record. | |
4759 | + */ | |
4760 | + for (i = 0; i < 4; i++) { | |
4761 | + p = &ebr->partitions[i]; | |
4762 | + rc = mbr_ebr_process_logical_drive(discover_list, | |
4763 | + node, | |
4764 | + ext_info, | |
4765 | + i, p, os2lvm, dlat); | |
4766 | + } | |
4767 | + } else { | |
4768 | + rc = -ENOMEM; | |
4769 | + } | |
4770 | + | |
4771 | + /* free the space used for OS2 DLAT info */ | |
4772 | + if (dlat) | |
4773 | + kfree(dlat); | |
4774 | + | |
4775 | + return (rc); | |
4776 | +} | |
4777 | + | |
4778 | +static int | |
4779 | +mbr_ebr_probe_for_ebr(struct evms_logical_node **discover_list, | |
4780 | + struct evms_logical_node *node, | |
4781 | + struct extended_part *ext_info) | |
4782 | +{ | |
4783 | + int rc = 0; | |
4784 | + u_char *sector_buffer = NULL; | |
4785 | + struct mbr_ebr *ebr = NULL; | |
4786 | + | |
4787 | + /* allocate a sector size buffer */ | |
4788 | + sector_buffer = kmalloc(node->hardsector_size, GFP_KERNEL); | |
4789 | + if (sector_buffer) | |
4790 | + /* read the location of the mbr sector */ | |
4791 | + rc = INIT_IO(node, 0, ext_info->start_sect, 1, sector_buffer); | |
4792 | + else | |
4793 | + rc = -ENOMEM; | |
4794 | + | |
4795 | + if (!rc) { | |
4796 | + ebr = (struct mbr_ebr *) sector_buffer; | |
4797 | + if (validate_mbr_ebr(node, ebr, | |
4798 | + START_SECT(ext_info->extended), | |
4799 | + ext_info->start_sect) == TRUE) | |
4800 | + rc = mbr_ebr_process_ebr(discover_list, | |
4801 | + node, ext_info, ebr); | |
4802 | + } | |
4803 | + | |
4804 | + if (sector_buffer) | |
4805 | + kfree(sector_buffer); | |
4806 | + | |
4807 | + return (rc); | |
4808 | +} | |
4809 | + | |
4810 | +static int | |
4811 | +mbr_ebr_process_extended_partition(struct evms_logical_node **discover_list, | |
4812 | + struct evms_logical_node *node, | |
4813 | + struct partition *p) | |
4814 | +{ | |
4815 | + int rc = 0; | |
4816 | + struct extended_part ext_info; | |
4817 | + | |
4818 | + memset(&ext_info, 0, sizeof (ext_info)); | |
4819 | + ext_info.done = FALSE; | |
4820 | + ext_info.extended = p; | |
4821 | + ext_info.next_ebr_start = START_SECT(p); | |
4822 | + while (ext_info.done == FALSE) { | |
4823 | + ext_info.done = TRUE; /* assume done, unless we find another EBR */ | |
4824 | + ext_info.start_sect = ext_info.next_ebr_start; | |
4825 | + rc = mbr_ebr_probe_for_ebr(discover_list, node, &ext_info); | |
4826 | + } | |
4827 | + return rc; | |
4828 | +} | |
4829 | + | |
4830 | +/* | |
4831 | + * is_non_dos_extended | |
4832 | + * | |
4833 | + * This function returns TRUE if the partition entry represents a non-DOS | |
4834 | + * extended partition such as UnixWare, Solaris x86 and BSD | |
4835 | + */ | |
4836 | +static int | |
4837 | +is_non_dos_extended(struct evms_logical_node **discover_list, | |
4838 | + struct evms_logical_node *node, struct partition *p) | |
4839 | +{ | |
4840 | + if (NR_SECTS(p)) { | |
4841 | +#ifdef CONFIG_BSD_DISKLABEL | |
4842 | + if (SYS_IND(p) == BSD_PARTITION || | |
4843 | + SYS_IND(p) == NETBSD_PARTITION || | |
4844 | + SYS_IND(p) == OPENBSD_PARTITION) | |
4845 | + return TRUE; | |
4846 | +#endif | |
4847 | + | |
4848 | +#ifdef CONFIG_UNIXWARE_DISKLABEL | |
4849 | + if (SYS_IND(p) == UNIXWARE_PARTITION) | |
4850 | + return TRUE; | |
4851 | +#endif | |
4852 | + | |
4853 | +#ifdef CONFIG_SOLARIS_X86_PARTITION | |
4854 | + if ((SYS_IND(p) == SOLARIS_X86_PARTITION) && | |
4855 | + (solaris_x86_partition(discover_list, node, p, TRUE) > 0)) | |
4856 | + return TRUE; | |
4857 | +#endif | |
4858 | + } | |
4859 | + return (FALSE); | |
4860 | +} | |
4861 | + | |
4862 | +/* | |
4863 | + * mbr_ebr_process_other_primary_partition | |
4864 | + * This function processes other (non-DOS) primary partitions such as | |
4865 | + * UnixWare, Solaris x86 and BSD | |
4866 | + */ | |
4867 | +static int | |
4868 | +mbr_ebr_process_other_primary_partition(struct evms_logical_node | |
4869 | + **discover_list, | |
4870 | + struct evms_logical_node *node, | |
4871 | + struct partition *p) | |
4872 | +{ | |
4873 | + if (NR_SECTS(p)) { | |
4874 | +#ifdef CONFIG_BSD_DISKLABEL | |
4875 | + if (SYS_IND(p) == BSD_PARTITION || | |
4876 | + SYS_IND(p) == NETBSD_PARTITION || | |
4877 | + SYS_IND(p) == OPENBSD_PARTITION) | |
4878 | + return bsd_disklabel_partition(discover_list, node, p); | |
4879 | +#endif | |
4880 | + | |
4881 | +#ifdef CONFIG_UNIXWARE_DISKLABEL | |
4882 | + if (SYS_IND(p) == UNIXWARE_PARTITION) | |
4883 | + return unixware_partition(discover_list, node, p); | |
4884 | +#endif | |
4885 | + | |
4886 | +#ifdef CONFIG_SOLARIS_X86_PARTITION | |
4887 | + if (SYS_IND(p) == SOLARIS_X86_PARTITION) | |
4888 | + return solaris_x86_partition(discover_list, node, p, | |
4889 | + FALSE); | |
4890 | +#endif | |
4891 | + } | |
4892 | + return (0); | |
4893 | +} | |
4894 | + | |
4895 | +static int | |
4896 | +mbr_ebr_process_dos_primary_partition(struct evms_logical_node **discover_list, | |
4897 | + struct evms_logical_node *node, | |
4898 | + int i, | |
4899 | + struct partition *p, | |
4900 | + int os2lvm, struct dla_table_sector *dlat) | |
4901 | +{ | |
4902 | + int rc = 0; | |
4903 | + char tmp_buf[EVMS_VOLUME_NAME_SIZE], *partition_name; | |
4904 | + | |
4905 | + LOG_EVERYTHING("%s: PartitionTableIndex(%i), Start("PFU64"), Size("PFU64")\n", | |
4906 | + __FUNCTION__, i, START_SECT(p), NR_SECTS(p)); | |
4907 | + | |
4908 | + if (NR_SECTS(p)) { | |
4909 | + | |
4910 | + if (is_extended_partition(p)) | |
4911 | + rc = mbr_ebr_process_extended_partition(discover_list, | |
4912 | + node, p); | |
4913 | + | |
4914 | + else { | |
4915 | + partition_name = NULL; | |
4916 | + if (os2lvm && p->sys_ind != LVM_PARTITION_INDICATOR && | |
4917 | + le32_to_cpu(dlat->DLA_Array[i].Partition_Start) == | |
4918 | + START_SECT(p) | |
4919 | + && le32_to_cpu(dlat->DLA_Array[i].Partition_Size) == | |
4920 | + NR_SECTS(p) | |
4921 | + && dlat->DLA_Array[i].Drive_Letter != '\0') { | |
4922 | + sprintf(tmp_buf, "os2/%c", | |
4923 | + dlat->DLA_Array[i].Drive_Letter); | |
4924 | + partition_name = tmp_buf; | |
4925 | + } | |
4926 | + evmsLOG2(EVMS_INFO_EXTRA, | |
4927 | + (print_partition_info(__FUNCTION__, p))); | |
4928 | + | |
4929 | + rc = mbr_ebr_process_segment(discover_list, | |
4930 | + node, | |
4931 | + START_SECT(p), | |
4932 | + NR_SECTS(p), | |
4933 | + p->sys_ind, | |
4934 | + i + 1, partition_name); | |
4935 | + } | |
4936 | + } | |
4937 | + return (rc); | |
4938 | +} | |
4939 | + | |
4940 | +static int | |
4941 | +mbr_ebr_process_mbr(struct evms_logical_node **discover_list, | |
4942 | + struct evms_logical_node *node, struct mbr_ebr *mbr) | |
4943 | +{ | |
4944 | + int rc = 0, i, os2lvm; | |
4945 | + struct partition *p; | |
4946 | + struct dla_table_sector *dlat = NULL; | |
4947 | + | |
4948 | + cur_comp_part_num = 5; /* set this value for each disk */ | |
4949 | + | |
4950 | + /* allocate space for the OS2 DLAT info */ | |
4951 | + dlat = kmalloc(node->hardsector_size, GFP_KERNEL); | |
4952 | + if (dlat) { | |
4953 | + /* read the dlat for this mbr */ | |
4954 | + os2lvm = os2lvm_partition(0, node, dlat); | |
4955 | + | |
4956 | + /* Pass 1: walk thru the partition table in the mbr | |
4957 | + * processing each partition record. | |
4958 | + */ | |
4959 | + for (i = 0; i < 4; i++) { | |
4960 | + p = &mbr->partitions[i]; | |
4961 | + if (is_non_dos_extended(discover_list, node, p)) { | |
4962 | + LOG_DETAILS | |
4963 | + (" Found and skip a non-dos extended partition.\n"); | |
4964 | + continue; | |
4965 | + } | |
4966 | + | |
4967 | + mbr_ebr_process_dos_primary_partition(discover_list, | |
4968 | + node, | |
4969 | + i, | |
4970 | + p, os2lvm, dlat); | |
4971 | + } | |
4972 | + | |
4973 | + /* Pass 2: walk thru the partition table in the mbr | |
4974 | + * processing each partition record for non-DOS extended partitions | |
4975 | + */ | |
4976 | + for (i = 0; i < 4; i++) { | |
4977 | + p = &mbr->partitions[i]; | |
4978 | + mbr_ebr_process_other_primary_partition(discover_list, | |
4979 | + node, p); | |
4980 | + } | |
4981 | + | |
4982 | + } else { | |
4983 | + rc = -ENOMEM; | |
4984 | + } | |
4985 | + | |
4986 | + /* free the space used for OS2 DLAT info */ | |
4987 | + if (dlat) | |
4988 | + kfree(dlat); | |
4989 | + | |
4990 | + return (rc); | |
4991 | +} | |
4992 | + | |
4993 | +static int | |
4994 | +mbr_ebr_probe_for_mbr(struct evms_logical_node **discover_list, | |
4995 | + struct evms_logical_node *node) | |
4996 | +{ | |
4997 | + int rc = 0; | |
4998 | + u_char *sector_buffer = NULL; | |
4999 | + struct mbr_ebr *mbr = NULL; | |
5000 | + | |
5001 | + LOG_DEBUG("%s: probing (%s).\n", __FUNCTION__, node->name); | |
5002 | + | |
5003 | + /* allocate a sector size buffer */ | |
5004 | + sector_buffer = kmalloc(node->hardsector_size, GFP_KERNEL); | |
5005 | + if (sector_buffer) | |
5006 | + /* read the location of the mbr sector */ | |
5007 | + rc = INIT_IO(node, 0, 0, 1, sector_buffer); | |
5008 | + else | |
5009 | + rc = -ENOMEM; | |
5010 | + if (rc) { | |
5011 | + LOG_ERROR("%s: read error(%d) on '%s'.\n", | |
5012 | + __FUNCTION__, rc, node->name); | |
5013 | + } else { | |
5014 | + mbr = (struct mbr_ebr *) sector_buffer; | |
5015 | + if (validate_mbr_ebr(node, mbr, 0, 0) == TRUE) { | |
5016 | + /* since it looks like this disk has a | |
5017 | + * valid MBR, remove the disk node from | |
5018 | + * the discover list. it may already be | |
5019 | + * on the global list, or it will be | |
5020 | + * added to it. in the case of an mbr | |
5021 | + * with no partitions, it is simply | |
5022 | + * removed and forgotten. when one or | |
5023 | + * more partitions are created, the | |
5024 | + * disk will be examined and handled | |
5025 | + * properly during the following | |
5026 | + * rediscover operation. | |
5027 | + */ | |
5028 | + evms_cs_remove_logical_node_from_list(discover_list, | |
5029 | + node); | |
5030 | + | |
5031 | + rc = mbr_ebr_process_mbr(discover_list, node, mbr); | |
5032 | + } | |
5033 | + } | |
5034 | + | |
5035 | + if (sector_buffer) | |
5036 | + kfree(sector_buffer); | |
5037 | + | |
5038 | + return (rc); | |
5039 | +} | |
5040 | + | |
5041 | +/* | |
5042 | + * Function: mbr_ebr_partition_discover | |
5043 | + * | |
5044 | + */ | |
5045 | +static int | |
5046 | +mbr_ebr_partition_discover(struct evms_logical_node **discover_list) | |
5047 | +{ | |
5048 | + int rc = 0; | |
5049 | + struct evms_logical_node *node, *next_node; | |
5050 | + | |
5051 | + MOD_INC_USE_COUNT; | |
5052 | + LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__); | |
5053 | + | |
5054 | + /* initialize global variable */ | |
5055 | + exported_nodes = 0; | |
5056 | + | |
5057 | + /* examine each node on the discover list */ | |
5058 | + next_node = *discover_list; | |
5059 | + while (next_node) { | |
5060 | + node = next_node; | |
5061 | + next_node = node->next; | |
5062 | + if (node->plugin->id == plugin_header.id) | |
5063 | + /* don't recurse into our own objects | |
5064 | + */ | |
5065 | + continue; | |
5066 | + mbr_ebr_probe_for_mbr(discover_list, node); | |
5067 | + } | |
5068 | + | |
5069 | + LOG_ENTRY_EXIT("%s: EXIT(exported nodes:%d, error code:%d)\n", | |
5070 | + __FUNCTION__, exported_nodes, rc); | |
5071 | + if (exported_nodes) | |
5072 | + rc = exported_nodes; | |
5073 | + MOD_DEC_USE_COUNT; | |
5074 | + return (rc); | |
5075 | +} | |
5076 | + | |
5077 | +/* | |
5078 | + * Function: mbr_ebr_partition_delete | |
5079 | + * | |
5080 | + */ | |
5081 | +static int | |
5082 | +mbr_ebr_partition_delete(struct evms_logical_node *segment) | |
5083 | +{ | |
5084 | + int rc = 0; | |
5085 | + struct dos_private *dos_prv; | |
5086 | + struct evms_logical_node *empty_disk = NULL; | |
5087 | + | |
5088 | + LOG_DETAILS("deleting segment '%s'.\n", segment->name); | |
5089 | + | |
5090 | + if (!segment) { | |
5091 | + rc = -ENODEV; | |
5092 | + } else { | |
5093 | + dos_prv = segment->private; | |
5094 | + if (dos_prv) { | |
5095 | + /* remove the segment from the | |
5096 | + * disk's segment list | |
5097 | + */ | |
5098 | + rc = remove_segment_from_disk(dos_prv->source_disk, | |
5099 | + segment, &empty_disk); | |
5100 | + /* free the local instance data */ | |
5101 | + kfree(dos_prv); | |
5102 | + } | |
5103 | + /* free the segment node */ | |
5104 | + evms_cs_deallocate_logical_node(segment); | |
5105 | + MOD_DEC_USE_COUNT; | |
5106 | + /* if the last segment on the disk was | |
5107 | + * deleted, delete the disk node too | |
5108 | + */ | |
5109 | + if (empty_disk) | |
5110 | + DELETE(empty_disk); | |
5111 | + } | |
5112 | + return (rc); | |
5113 | +} | |
5114 | + | |
5115 | +/* | |
5116 | + * function: mbr_ebr_partition_io_error | |
5117 | + * | |
5118 | + * this function was primarily created because the function | |
5119 | + * buffer_IO_error is inline and kgdb doesn't allow breakpoints | |
5120 | + * to be set on inline functions. Since this was an error path | |
5121 | + * and not mainline, I decided to add a trace statement to help | |
5122 | + * report on the failing condition. | |
5123 | + * | |
5124 | + */ | |
5125 | +static void | |
5126 | +mbr_ebr_partition_io_error(struct evms_logical_node *node, | |
5127 | + int io_flag, struct buffer_head *bh) | |
5128 | +{ | |
5129 | + LOG_SERIOUS | |
5130 | + ("attempt to %s beyond partition boundary("PFU64") on (%s), rsector("PFU64").\n", | |
5131 | + (io_flag) ? "WRITE" : "READ", node->total_vsectors - 1, node->name, | |
5132 | + (u64) bh->b_rsector); | |
5133 | + | |
5134 | + bh->b_end_io(bh, 0); | |
5135 | +} | |
5136 | + | |
5137 | +/* | |
5138 | + * Function: mbr_ebr_partition_read | |
5139 | + * | |
5140 | + */ | |
5141 | +static void | |
5142 | +mbr_ebr_partition_read(struct evms_logical_node *partition, | |
5143 | + struct buffer_head *bh) | |
5144 | +{ | |
5145 | + struct dos_private *dos_prv = partition->private; | |
5146 | + | |
5147 | + if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <= | |
5148 | + partition->total_vsectors) { | |
5149 | + bh->b_rsector += dos_prv->start_sect; | |
5150 | + R_IO(dos_prv->source_disk, bh); | |
5151 | + } else | |
5152 | + mbr_ebr_partition_io_error(partition, READ, bh); | |
5153 | +} | |
5154 | + | |
5155 | +/* | |
5156 | + * Function: mbr_ebr_partition_write | |
5157 | + * | |
5158 | + */ | |
5159 | +static void | |
5160 | +mbr_ebr_partition_write(struct evms_logical_node *partition, | |
5161 | + struct buffer_head *bh) | |
5162 | +{ | |
5163 | + struct dos_private *dos_prv = partition->private; | |
5164 | + | |
5165 | + if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <= | |
5166 | + partition->total_vsectors) { | |
5167 | + bh->b_rsector += dos_prv->start_sect; | |
5168 | + W_IO(dos_prv->source_disk, bh); | |
5169 | + } else | |
5170 | + mbr_ebr_partition_io_error(partition, WRITE, bh); | |
5171 | +} | |
5172 | + | |
5173 | +/* | |
5174 | + * Function: mbr_ebr_partition_init_io | |
5175 | + * | |
5176 | + */ | |
5177 | +static int | |
5178 | +mbr_ebr_partition_init_io(struct evms_logical_node *partition, int io_flag, /* 0=read, 1=write */ | |
5179 | + u64 sect_nr, /* disk LBA */ | |
5180 | + u64 num_sects, /* # of sectors */ | |
5181 | + void *buf_addr) | |
5182 | +{ /* buffer address */ | |
5183 | + int rc; | |
5184 | + struct dos_private *dos_prv = partition->private; | |
5185 | + | |
5186 | + if ((sect_nr + num_sects) <= partition->total_vsectors) { | |
5187 | + rc = INIT_IO(dos_prv->source_disk, io_flag, | |
5188 | + sect_nr + dos_prv->start_sect, num_sects, | |
5189 | + buf_addr); | |
5190 | + } else { | |
5191 | + LOG_SERIOUS | |
5192 | + ("init_io: attempt to %s beyond partition(%s) boundary("PFU64") at sector("PFU64") for count("PFU64").\n", | |
5193 | + (io_flag) ? "WRITE" : "READ", partition->name, | |
5194 | + (dos_prv->nr_sects - 1), sect_nr, num_sects); | |
5195 | + rc = -EINVAL; | |
5196 | + } | |
5197 | + | |
5198 | + return (rc); | |
5199 | +} | |
5200 | + | |
5201 | +/* | |
5202 | + * Function: mbr_ebr_partition_ioctl | |
5203 | + * | |
5204 | + */ | |
5205 | +static int | |
5206 | +mbr_ebr_partition_ioctl(struct evms_logical_node *partition, | |
5207 | + struct inode *inode, | |
5208 | + struct file *file, unsigned int cmd, unsigned long arg) | |
5209 | +{ | |
5210 | + struct dos_private *dos_prv; | |
5211 | + struct hd_geometry hd_geo; | |
5212 | + int rc; | |
5213 | + | |
5214 | + rc = 0; | |
5215 | + dos_prv = partition->private; | |
5216 | + if (!inode) | |
5217 | + return -EINVAL; | |
5218 | + switch (cmd) { | |
5219 | + case HDIO_GETGEO: | |
5220 | + { | |
5221 | + rc = IOCTL(dos_prv->source_disk, inode, file, cmd, arg); | |
5222 | + if (rc) | |
5223 | + break; | |
5224 | + if (copy_from_user | |
5225 | + (&hd_geo, (void *) arg, | |
5226 | + sizeof (struct hd_geometry))) | |
5227 | + rc = -EFAULT; | |
5228 | + if (rc) | |
5229 | + break; | |
5230 | + hd_geo.start = dos_prv->start_sect; | |
5231 | + if (copy_to_user | |
5232 | + ((void *) arg, &hd_geo, | |
5233 | + sizeof (struct hd_geometry))) | |
5234 | + rc = -EFAULT; | |
5235 | + } | |
5236 | + break; | |
5237 | + case EVMS_GET_BMAP: | |
5238 | + { | |
5239 | + struct evms_get_bmap_pkt *bmap = | |
5240 | + (struct evms_get_bmap_pkt *) arg; | |
5241 | + bmap->rsector += dos_prv->start_sect; | |
5242 | + /* intentionally fall thru to | |
5243 | + * default ioctl down to device | |
5244 | + * manager. | |
5245 | + */ | |
5246 | + } | |
5247 | + default: | |
5248 | + rc = IOCTL(dos_prv->source_disk, inode, file, cmd, arg); | |
5249 | + } | |
5250 | + return rc; | |
5251 | +} | |
5252 | + | |
5253 | +/* | |
5254 | + * Function: dos_part_init | |
5255 | + * | |
5256 | + */ | |
5257 | +static int __init | |
5258 | +dos_part_init(void) | |
5259 | +{ | |
5260 | + return evms_cs_register_plugin(&plugin_header); /* register with EVMS */ | |
5261 | +} | |
5262 | + | |
5263 | +static void __exit | |
5264 | +dos_part_exit(void) | |
5265 | +{ | |
5266 | + evms_cs_unregister_plugin(&plugin_header); | |
5267 | +} | |
5268 | + | |
5269 | +module_init(dos_part_init); | |
5270 | +module_exit(dos_part_exit); | |
5271 | +#ifdef MODULE_LICENSE | |
5272 | +MODULE_LICENSE("GPL"); | |
5273 | +#endif | |
5274 | diff -Naur linux-2002-09-30/drivers/evms/evms.c evms-2002-09-30/drivers/evms/evms.c | |
5275 | --- linux-2002-09-30/drivers/evms/evms.c Wed Dec 31 18:00:00 1969 | |
5276 | +++ evms-2002-09-30/drivers/evms/evms.c Thu Sep 26 11:55:45 2002 | |
5277 | @@ -0,0 +1,5865 @@ | |
5278 | +/* -*- linux-c -*- */ | |
5279 | +/* | |
5280 | + * | |
5281 | + * | |
5282 | + * Copyright (c) International Business Machines Corp., 2000 | |
5283 | + * | |
5284 | + * This program is free software; you can redistribute it and/or modify | |
5285 | + * it under the terms of the GNU General Public License as published by | |
5286 | + * the Free Software Foundation; either version 2 of the License, or | |
5287 | + * (at your option) any later version. | |
5288 | + * | |
5289 | + * This program is distributed in the hope that it will be useful, | |
5290 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5291 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
5292 | + * the GNU General Public License for more details. | |
5293 | + * | |
5294 | + * You should have received a copy of the GNU General Public License | |
5295 | + * along with this program; if not, write to the Free Software | |
5296 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
5297 | + * | |
5298 | + * | |
5299 | + */ | |
5300 | +/* | |
5301 | + * | |
5302 | + * linux/drivers/evms/evms.c | |
5303 | + * | |
5304 | + * EVMS Base and Common Services | |
5305 | + * | |
5306 | + */ | |
5307 | + | |
5308 | +#define DEVICE_NR(device) MINOR(device) /* evms has no partition bits */ | |
5309 | +#define DEVICE_NAME "evms" /* name for messaging */ | |
5310 | +#define DEVICE_NO_RANDOM /* no entropy to contribute */ | |
5311 | +#define DEVICE_OFF(d) /* do nothing */ | |
5312 | + | |
5313 | +//#define LOCAL_DEBUG 1 | |
5314 | + | |
5315 | +#include <linux/config.h> | |
5316 | +#include <linux/module.h> | |
5317 | +#include <linux/errno.h> | |
5318 | +#include <linux/kernel.h> | |
5319 | +#include <linux/init.h> | |
5320 | +#include <linux/fs.h> | |
5321 | +#include <linux/slab.h> | |
5322 | +#include <asm/uaccess.h> | |
5323 | +#include <linux/blk.h> /* must be included by all block drivers */ | |
5324 | +#include <linux/blkdev.h> | |
5325 | +#include <linux/blkpg.h> | |
5326 | +#include <linux/iobuf.h> | |
5327 | +#include <linux/genhd.h> | |
5328 | +#include <linux/sched.h> | |
5329 | +#include <linux/completion.h> | |
5330 | +#include <linux/version.h> | |
5331 | +#include <linux/swap.h> | |
5332 | +#include <net/checksum.h> | |
5333 | +#include <linux/sysctl.h> | |
5334 | +#include <linux/smp_lock.h> | |
5335 | +#include <linux/reboot.h> | |
5336 | +#include <linux/compiler.h> | |
5337 | +#include <linux/evms/evms.h> | |
5338 | + | |
5339 | +//#define VFS_PATCH_PRESENT | |
5340 | + | |
5341 | +/* prefix used in logging messages */ | |
5342 | +#define LOG_PREFIX | |
5343 | + | |
5344 | +struct evms_registered_plugin { | |
5345 | + struct evms_plugin_header *plugin; | |
5346 | + struct evms_registered_plugin *next; | |
5347 | +}; | |
5348 | +static struct evms_registered_plugin *registered_plugin_head = NULL; | |
5349 | + | |
5350 | +static struct evms_list_node *evms_global_device_list = NULL; | |
5351 | +static struct evms_list_node *evms_global_feature_node_list = NULL; | |
5352 | +static struct evms_list_node *evms_global_notify_list = NULL; | |
5353 | + | |
5354 | +int evms_info_level = EVMS_INFO_LEVEL; | |
5355 | +struct proc_dir_entry *evms_proc_dir = NULL; | |
5356 | +EXPORT_SYMBOL(evms_info_level); | |
5357 | +static struct evms_logical_volume *evms_logical_volumes; | |
5358 | +static int evms_volumes = 0; | |
5359 | +/* a few variables to aid in detecting memory leaks. | |
5360 | + * these variables are always in use, regardless of | |
5361 | + * the state of EVMS_MEM_DEBUG. | |
5362 | + */ | |
5363 | +static atomic_t evms_allocs = (atomic_t) ATOMIC_INIT(0); | |
5364 | +static atomic_t evms_logical_nodes = (atomic_t) ATOMIC_INIT(0); | |
5365 | + | |
5366 | +u8 *evms_primary_string = "primary"; | |
5367 | +EXPORT_SYMBOL(evms_primary_string); | |
5368 | +u8 *evms_secondary_string = "secondary"; | |
5369 | +EXPORT_SYMBOL(evms_secondary_string); | |
5370 | + | |
5371 | +static struct evms_version evms_svc_version = { | |
5372 | + .major = EVMS_COMMON_SERVICES_MAJOR, | |
5373 | + .minor = EVMS_COMMON_SERVICES_MINOR, | |
5374 | + .patchlevel = EVMS_COMMON_SERVICES_PATCHLEVEL | |
5375 | +}; | |
5376 | + | |
5377 | +/* Handles for "private" EVMS object pools */ | |
5378 | +static struct evms_pool_mgmt *evms_io_notify_pool; | |
5379 | + | |
5380 | +/* Handles for "public" EVMS object pools */ | |
5381 | +struct evms_pool_mgmt *evms_bh_pool; | |
5382 | +EXPORT_SYMBOL(evms_bh_pool); | |
5383 | + | |
5384 | +/* Handle for the devfs directory entry */ | |
5385 | +devfs_handle_t evms_dir_devfs_handle; | |
5386 | +devfs_handle_t evms_blk_devfs_handle; | |
5387 | + | |
5388 | +/**********************************************************/ | |
5389 | +/* SYSCTL - EVMS folder */ | |
5390 | +/**********************************************************/ | |
5391 | + | |
5392 | +#ifdef CONFIG_PROC_FS | |
5393 | +static struct ctl_table_header *evms_table_header; | |
5394 | +static int evms_info_level_min = EVMS_INFO_CRITICAL; | |
5395 | +static int evms_info_level_max = EVMS_INFO_EVERYTHING; | |
5396 | + | |
5397 | +static ctl_table evms_table[] = { | |
5398 | + {DEV_EVMS_INFO_LEVEL, "evms_info_level", | |
5399 | + &evms_info_level, sizeof (int), 0644, NULL, | |
5400 | + &proc_dointvec_minmax, &sysctl_intvec, | |
5401 | + NULL, &evms_info_level_min, &evms_info_level_max}, | |
5402 | + {0} | |
5403 | +}; | |
5404 | + | |
5405 | +static ctl_table evms_dir_table[] = { | |
5406 | + {DEV_EVMS, "evms", NULL, 0, 0555, evms_table}, | |
5407 | + {0} | |
5408 | +}; | |
5409 | + | |
5410 | +static ctl_table dev_dir_table[] = { | |
5411 | + {CTL_DEV, "dev", NULL, 0, 0555, evms_dir_table}, | |
5412 | + {0} | |
5413 | +}; | |
5414 | +#endif | |
5415 | + | |
5416 | +/**********************************************************/ | |
5417 | +/* START -- arch ioctl32 support */ | |
5418 | +/**********************************************************/ | |
5419 | +#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) | |
5420 | +#include <linux/evms/evms_bbr_k.h> | |
5421 | +#include <linux/raid/md.h> | |
5422 | + | |
5423 | +extern asmlinkage long | |
5424 | +sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); | |
5425 | + | |
5426 | +extern int | |
5427 | +register_ioctl32_conversion(unsigned int cmd, void *handler); | |
5428 | + | |
5429 | +extern int | |
5430 | +unregister_ioctl32_conversion(unsigned int cmd); | |
5431 | + | |
5432 | +#define uvirt_to_kernel(__x) ((unsigned long)(__x)) | |
5433 | +typedef unsigned int __uvirt_addr; | |
5434 | + | |
5435 | +struct evms_sector_io32 { | |
5436 | + u64 disk_handle; | |
5437 | + s32 io_flag; | |
5438 | + u64 starting_sector; | |
5439 | + u64 sector_count; | |
5440 | + __uvirt_addr buffer_address; | |
5441 | + s32 status; | |
5442 | +}; | |
5443 | + | |
5444 | +struct evms_rediscover32 { | |
5445 | + s32 status; | |
5446 | + u32 drive_count; | |
5447 | + __uvirt_addr drive_array; | |
5448 | +}; | |
5449 | + | |
5450 | +struct evms_compute_csum32 { | |
5451 | + __uvirt_addr buffer_address; | |
5452 | + s32 buffer_size; | |
5453 | + u32 insum; | |
5454 | + u32 outsum; | |
5455 | + s32 status; | |
5456 | +}; | |
5457 | + | |
5458 | +struct evms_plugin_ioctl32 { | |
5459 | + u32 feature_id; | |
5460 | + s32 feature_command; | |
5461 | + s32 status; | |
5462 | + __uvirt_addr feature_ioctl_data; | |
5463 | +}; | |
5464 | + | |
5465 | +struct evms_notify_bbr32 { | |
5466 | + char object_name[EVMS_VOLUME_NAME_SIZE+1]; | |
5467 | + u64 count; | |
5468 | + u64 start_sect; | |
5469 | + u64 nr_sect; | |
5470 | + __uvirt_addr buffer; | |
5471 | + s32 rw; | |
5472 | +}; | |
5473 | + | |
5474 | +#define EVMS_MD_ID 4 | |
5475 | +#define EVMS_MD_PERS_IOCTL_CMD 1 | |
5476 | +#define EVMS_MD_ADD 2 | |
5477 | +#define EVMS_MD_REMOVE 3 | |
5478 | +#define EVMS_MD_ACTIVATE 4 | |
5479 | +#define EVMS_MD_DEACTIVATE 5 | |
5480 | +#define EVMS_MD_GET_ARRAY_INFO 6 | |
5481 | +#define EVMS_MD_RAID5_INIT_IO 1 | |
5482 | + | |
5483 | +struct evms_md_ioctl { | |
5484 | + int mddev_idx; | |
5485 | + int cmd; | |
5486 | + void *arg; | |
5487 | +}; | |
5488 | + | |
5489 | +struct evms_md_ioctl32 { | |
5490 | + u32 mddev_idx; | |
5491 | + u32 cmd; | |
5492 | + __uvirt_addr arg; | |
5493 | +}; | |
5494 | + | |
5495 | +struct evms_md_array_info { | |
5496 | + unsigned long state; | |
5497 | + mdp_super_t *sb; | |
5498 | +}; | |
5499 | + | |
5500 | +struct evms_md_array_info32 { | |
5501 | + u32 state; | |
5502 | + __uvirt_addr sb; | |
5503 | +}; | |
5504 | + | |
5505 | +struct raid5_ioctl_init_io { | |
5506 | + int rw; | |
5507 | + u64 lsn; | |
5508 | + u64 nr_sects; | |
5509 | + void *data; | |
5510 | +}; | |
5511 | + | |
5512 | +struct raid5_ioctl_init_io32 { | |
5513 | + s32 rw; | |
5514 | + u64 lsn; | |
5515 | + u64 nr_sects; | |
5516 | + __uvirt_addr data; | |
5517 | +}; | |
5518 | + | |
5519 | +#define EVMS_MD_PLUGIN_ID ((IBM_OEM_ID << 16) | \ | |
5520 | + (EVMS_REGION_MANAGER << 12) | EVMS_MD_ID) | |
5521 | +#define EVMS_BBR_PLUGIN_ID ((IBM_OEM_ID << 16) | \ | |
5522 | + (EVMS_FEATURE << 12) | EVMS_BBR_FEATURE_ID) | |
5523 | + | |
5524 | +#define EVMS_SECTOR_IO_32 _IOWR(EVMS_MAJOR, \ | |
5525 | + EVMS_SECTOR_IO_NUMBER, \ | |
5526 | + struct evms_sector_io32) | |
5527 | +#define EVMS_REDISCOVER_VOLUMES_32 _IOWR(EVMS_MAJOR, \ | |
5528 | + EVMS_REDISCOVER_VOLUMES_NUMBER, \ | |
5529 | + struct evms_rediscover32) | |
5530 | +#define EVMS_COMPUTE_CSUM_32 _IOWR(EVMS_MAJOR, \ | |
5531 | + EVMS_COMPUTE_CSUM_NUMBER, \ | |
5532 | + struct evms_compute_csum32) | |
5533 | +#define EVMS_PLUGIN_IOCTL_32 _IOR(EVMS_MAJOR, \ | |
5534 | + EVMS_PLUGIN_IOCTL_NUMBER, \ | |
5535 | + struct evms_plugin_ioctl32) | |
5536 | + | |
5537 | +static int evms_sector_io(unsigned int fd, | |
5538 | + unsigned int cmd, | |
5539 | + unsigned long arg) | |
5540 | +{ | |
5541 | + mm_segment_t old_fs = get_fs(); | |
5542 | + struct evms_sector_io32 parms32; | |
5543 | + struct evms_sector_io_pkt parms; | |
5544 | + unsigned int kcmd; | |
5545 | + void *karg; | |
5546 | + int rc = 0; | |
5547 | + | |
5548 | + if (copy_from_user(&parms32, (struct evms_sector_io32 *)arg, | |
5549 | + sizeof(struct evms_sector_io32))) | |
5550 | + return -EFAULT; | |
5551 | + | |
5552 | + parms.disk_handle = parms32.disk_handle; | |
5553 | + parms.io_flag = parms32.io_flag; | |
5554 | + parms.starting_sector = parms32.starting_sector; | |
5555 | + parms.sector_count = parms32.sector_count; | |
5556 | + parms.buffer_address = (u8 *)uvirt_to_kernel(parms32.buffer_address); | |
5557 | + parms.status = 0; | |
5558 | + | |
5559 | + kcmd = EVMS_SECTOR_IO; | |
5560 | + karg = &parms; | |
5561 | + | |
5562 | + set_fs(KERNEL_DS); | |
5563 | + rc = sys_ioctl(fd, kcmd, (unsigned long)karg); | |
5564 | + set_fs(old_fs); | |
5565 | + | |
5566 | + parms32.status = parms.status; | |
5567 | + | |
5568 | + if (copy_to_user((struct evms_sector_io32 *)arg, &parms32, | |
5569 | + sizeof(struct evms_sector_io32))) | |
5570 | + return -EFAULT; | |
5571 | + | |
5572 | + return rc; | |
5573 | +} | |
5574 | + | |
5575 | +static int evms_rediscover(unsigned int fd, | |
5576 | + unsigned int cmd, | |
5577 | + unsigned long arg) | |
5578 | +{ | |
5579 | + mm_segment_t old_fs = get_fs(); | |
5580 | + struct evms_rediscover32 parms32; | |
5581 | + struct evms_rediscover_pkt parms; | |
5582 | + unsigned int kcmd; | |
5583 | + void *karg; | |
5584 | + int rc = 0; | |
5585 | + | |
5586 | + if (copy_from_user(&parms32, (struct evms_rediscover32 *)arg, | |
5587 | + sizeof(struct evms_rediscover32))) | |
5588 | + return -EFAULT; | |
5589 | + | |
5590 | + parms.drive_count = parms32.drive_count; | |
5591 | + parms.drive_array = (void *)uvirt_to_kernel(parms32.drive_array); | |
5592 | + parms.status = 0; | |
5593 | + | |
5594 | + kcmd = EVMS_REDISCOVER_VOLUMES; | |
5595 | + karg = &parms; | |
5596 | + | |
5597 | + set_fs(KERNEL_DS); | |
5598 | + rc = sys_ioctl(fd, kcmd, (unsigned long)karg); | |
5599 | + set_fs(old_fs); | |
5600 | + | |
5601 | + parms32.status = parms.status; | |
5602 | + | |
5603 | + if (copy_to_user((struct evms_rediscover32 *)arg, &parms32, | |
5604 | + sizeof(struct evms_rediscover32))) | |
5605 | + return -EFAULT; | |
5606 | + | |
5607 | + return rc; | |
5608 | +} | |
5609 | + | |
5610 | +static int evms_compute_csum(unsigned int fd, | |
5611 | + unsigned int cmd, | |
5612 | + unsigned long arg) | |
5613 | +{ | |
5614 | + mm_segment_t old_fs = get_fs(); | |
5615 | + struct evms_compute_csum32 parms32; | |
5616 | + struct evms_compute_csum_pkt parms; | |
5617 | + unsigned int kcmd; | |
5618 | + void *karg; | |
5619 | + int rc = 0; | |
5620 | + | |
5621 | + if (copy_from_user(&parms32, (struct evms_compute_csum32 *)arg, | |
5622 | + sizeof(struct evms_compute_csum32))) | |
5623 | + return -EFAULT; | |
5624 | + | |
5625 | + parms.insum = parms32.insum; | |
5626 | + parms.outsum = parms32.outsum; | |
5627 | + parms.buffer_size = parms32.buffer_size; | |
5628 | + parms.buffer_address = (void *)uvirt_to_kernel(parms32.buffer_address); | |
5629 | + parms.status = 0; | |
5630 | + | |
5631 | + kcmd = EVMS_COMPUTE_CSUM; | |
5632 | + karg = &parms; | |
5633 | + | |
5634 | + set_fs(KERNEL_DS); | |
5635 | + rc = sys_ioctl(fd, kcmd, (unsigned long)karg); | |
5636 | + set_fs(old_fs); | |
5637 | + | |
5638 | + parms32.status = parms.status; | |
5639 | + parms32.outsum = parms.outsum; | |
5640 | + | |
5641 | + if (copy_to_user((struct evms_compute_csum32 *)arg, &parms32, | |
5642 | + sizeof(struct evms_compute_csum32))) | |
5643 | + return -EFAULT; | |
5644 | + | |
5645 | + return rc; | |
5646 | +} | |
5647 | + | |
5648 | +static int evms_bbr_plugin_ioctl(unsigned int fd, | |
5649 | + unsigned int cmd, | |
5650 | + unsigned long arg) | |
5651 | +{ | |
5652 | + mm_segment_t old_fs = get_fs(); | |
5653 | + struct evms_notify_bbr32 bbr_parms32; | |
5654 | + struct evms_notify_bbr bbr_parms; | |
5655 | + struct evms_plugin_ioctl_pkt *parms = | |
5656 | + (struct evms_plugin_ioctl_pkt *)arg; | |
5657 | + void *old_ptr = NULL; | |
5658 | + int rc; | |
5659 | + | |
5660 | + if (copy_from_user(&bbr_parms32, | |
5661 | + (struct evms_notify_bbr32 *)parms->feature_ioctl_data, | |
5662 | + sizeof(struct evms_notify_bbr32))) | |
5663 | + return -EFAULT; | |
5664 | + | |
5665 | + memcpy(&bbr_parms, &bbr_parms32, sizeof(struct evms_notify_bbr32)); | |
5666 | + bbr_parms.buffer = (void *)uvirt_to_kernel(bbr_parms32.buffer); | |
5667 | + bbr_parms.rw = bbr_parms32.rw; | |
5668 | + old_ptr = parms->feature_ioctl_data; | |
5669 | + parms->feature_ioctl_data = &bbr_parms; | |
5670 | + | |
5671 | + set_fs(KERNEL_DS); | |
5672 | + rc = sys_ioctl(fd, cmd, arg); | |
5673 | + set_fs(old_fs); | |
5674 | + | |
5675 | + parms->feature_ioctl_data = old_ptr; | |
5676 | + | |
5677 | + if (!rc) { | |
5678 | + bbr_parms32.nr_sect = bbr_parms.nr_sect; | |
5679 | + rc = copy_to_user((struct evms_notify_bbr32 *)parms->feature_ioctl_data, | |
5680 | + &bbr_parms32, | |
5681 | + sizeof(struct evms_notify_bbr32)); | |
5682 | + } | |
5683 | + | |
5684 | + return rc; | |
5685 | +} | |
5686 | + | |
5687 | +static int evms_md_plugin_ioctl(unsigned int fd, | |
5688 | + unsigned int cmd, | |
5689 | + unsigned long arg) | |
5690 | +{ | |
5691 | + mm_segment_t old_fs = get_fs(); | |
5692 | + void *old_ptr = NULL; | |
5693 | + void *old_md_ptr = NULL; | |
5694 | + struct evms_md_ioctl32 md_parms32; | |
5695 | + struct evms_md_ioctl md_parms; | |
5696 | + struct evms_md_array_info32 md_array_parms32; | |
5697 | + struct evms_md_array_info md_array_parms; | |
5698 | + struct raid5_ioctl_init_io32 r5_init_io_parms32; | |
5699 | + struct raid5_ioctl_init_io r5_init_io_parms; | |
5700 | + struct evms_plugin_ioctl_pkt *parms = | |
5701 | + (struct evms_plugin_ioctl_pkt *)arg; | |
5702 | + int rc; | |
5703 | + | |
5704 | + if (copy_from_user(&md_parms32, | |
5705 | + (struct evms_md_ioctl*)parms->feature_ioctl_data, | |
5706 | + sizeof(struct evms_md_ioctl32))) | |
5707 | + return -EFAULT; | |
5708 | + | |
5709 | + md_parms.mddev_idx = md_parms32.mddev_idx; | |
5710 | + md_parms.cmd = md_parms32.cmd; | |
5711 | + md_parms.arg = (void *)uvirt_to_kernel(md_parms32.arg); | |
5712 | + old_ptr = parms->feature_ioctl_data; | |
5713 | + parms->feature_ioctl_data = &md_parms; | |
5714 | + | |
5715 | + if (parms->feature_command == EVMS_MD_GET_ARRAY_INFO) { | |
5716 | + if (copy_from_user(&md_array_parms32, | |
5717 | + (struct evms_md_array_info32*)md_parms.arg, | |
5718 | + sizeof(struct evms_md_array_info32))) | |
5719 | + return -EFAULT; | |
5720 | + | |
5721 | + md_array_parms.state = md_array_parms32.state; | |
5722 | + md_array_parms.sb = | |
5723 | + (void *)uvirt_to_kernel(md_array_parms32.sb); | |
5724 | + old_md_ptr = (void *)md_parms.arg; | |
5725 | + md_parms.arg = &md_array_parms; | |
5726 | + } else if (parms->feature_command == EVMS_MD_PERS_IOCTL_CMD) { | |
5727 | + if (md_parms.cmd == EVMS_MD_RAID5_INIT_IO) { | |
5728 | + if (copy_from_user(&r5_init_io_parms32, | |
5729 | + (struct raid5_ioctl_init_io32*)md_parms.arg, | |
5730 | + sizeof(struct raid5_ioctl_init_io32))) | |
5731 | + return -EFAULT; | |
5732 | + | |
5733 | + r5_init_io_parms.rw = r5_init_io_parms32.rw; | |
5734 | + r5_init_io_parms.lsn = r5_init_io_parms32.lsn; | |
5735 | + r5_init_io_parms.nr_sects = r5_init_io_parms32.nr_sects; | |
5736 | + r5_init_io_parms.data = | |
5737 | + (void *)uvirt_to_kernel(r5_init_io_parms32.data); | |
5738 | + old_md_ptr = (void *)md_parms.arg; | |
5739 | + md_parms.arg = &r5_init_io_parms; | |
5740 | + } | |
5741 | + } | |
5742 | + | |
5743 | + set_fs(KERNEL_DS); | |
5744 | + rc = sys_ioctl(fd, cmd, arg); | |
5745 | + set_fs(old_fs); | |
5746 | + | |
5747 | + parms->feature_ioctl_data = old_ptr; | |
5748 | + md_parms.arg = old_md_ptr; | |
5749 | + | |
5750 | + if (!rc) { | |
5751 | + if (parms->feature_command == EVMS_MD_GET_ARRAY_INFO) { | |
5752 | + md_array_parms32.state = md_array_parms.state; | |
5753 | + rc = copy_to_user((struct evms_md_array_info32 *)md_parms.arg, | |
5754 | + &md_array_parms32, | |
5755 | + sizeof(struct evms_md_array_info32)); | |
5756 | + } | |
5757 | + if (!rc) { | |
5758 | + md_parms32.mddev_idx = md_parms.mddev_idx; | |
5759 | + rc = copy_to_user((struct evms_md_ioctl*)parms->feature_ioctl_data, | |
5760 | + &md_parms32, | |
5761 | + sizeof(struct evms_md_ioctl32)); | |
5762 | + } | |
5763 | + } | |
5764 | + | |
5765 | + return rc; | |
5766 | +} | |
5767 | + | |
5768 | +static int evms_plugin_ioctl(unsigned int fd, | |
5769 | + unsigned int cmd, | |
5770 | + unsigned long arg) | |
5771 | +{ | |
5772 | + mm_segment_t old_fs = get_fs(); | |
5773 | + struct evms_plugin_ioctl32 parms32; | |
5774 | + struct evms_plugin_ioctl_pkt parms; | |
5775 | + unsigned int kcmd; | |
5776 | + void *karg; | |
5777 | + int rc; | |
5778 | + | |
5779 | + if (copy_from_user(&parms32, (struct evms_plugin_ioctl32 *)arg, | |
5780 | + sizeof(struct evms_plugin_ioctl32))) | |
5781 | + return -EFAULT; | |
5782 | + | |
5783 | + parms.feature_id = parms32.feature_id; | |
5784 | + parms.feature_command = parms32.feature_command; | |
5785 | + parms.status = parms32.status; | |
5786 | + parms.feature_ioctl_data = | |
5787 | + (void *)uvirt_to_kernel(parms32.feature_ioctl_data); | |
5788 | + | |
5789 | + kcmd = EVMS_PLUGIN_IOCTL; | |
5790 | + karg = &parms; | |
5791 | + | |
5792 | + switch (parms.feature_id) { | |
5793 | + case EVMS_MD_PLUGIN_ID: | |
5794 | + rc = evms_md_plugin_ioctl(fd, kcmd, (unsigned long)karg); | |
5795 | + break; | |
5796 | + case EVMS_BBR_PLUGIN_ID: | |
5797 | + rc = evms_bbr_plugin_ioctl(fd, kcmd, (unsigned long)karg); | |
5798 | + break; | |
5799 | + default: | |
5800 | + set_fs(KERNEL_DS); | |
5801 | + rc = sys_ioctl(fd, kcmd, (unsigned long)karg); | |
5802 | + set_fs(old_fs); | |
5803 | + } | |
5804 | + | |
5805 | + if (!rc) { | |
5806 | + parms32.status = parms.status; | |
5807 | + rc = copy_to_user((struct evms_plugin_ioctl32 *)arg, &parms32, | |
5808 | + sizeof(struct evms_plugin_ioctl32)); | |
5809 | + } | |
5810 | + | |
5811 | + return rc; | |
5812 | +} | |
5813 | +#endif | |
5814 | + | |
5815 | +/**********************************************************/ | |
5816 | +/* START -- exported functions/Common Services */ | |
5817 | +/**********************************************************/ | |
5818 | + | |
5819 | +/* | |
5820 | + * Function: evms_cs_get_version | |
5821 | + * Description: This function returns the current EVMS version | |
5822 | + */ | |
5823 | +void | |
5824 | +evms_cs_get_version(int *major, int *minor) | |
5825 | +{ | |
5826 | + *major = EVMS_MAJOR_VERSION; | |
5827 | + *minor = EVMS_MINOR_VERSION; | |
5828 | +} | |
5829 | + | |
5830 | +EXPORT_SYMBOL(evms_cs_get_version); | |
5831 | + | |
5832 | +int | |
5833 | +evms_cs_check_version(struct evms_version *required, | |
5834 | + struct evms_version *actual) | |
5835 | +{ | |
5836 | + if (required->major != actual->major) | |
5837 | + return -EINVAL; | |
5838 | + else if (required->minor > actual->minor) | |
5839 | + return -EINVAL; | |
5840 | + else if (required->minor == actual->minor) | |
5841 | + if (required->patchlevel > actual->patchlevel) | |
5842 | + return -EINVAL; | |
5843 | + return 0; | |
5844 | +} | |
5845 | + | |
5846 | +EXPORT_SYMBOL(evms_cs_check_version); | |
5847 | + | |
5848 | +int | |
5849 | +evms_cs_allocate_logical_node(struct evms_logical_node **pp) | |
5850 | +{ | |
5851 | + *pp = kmalloc(sizeof (struct evms_logical_node), GFP_KERNEL); | |
5852 | + if (*pp) { | |
5853 | + memset(*pp, 0, sizeof (struct evms_logical_node)); | |
5854 | + atomic_inc(&evms_logical_nodes); | |
5855 | + return 0; | |
5856 | + } | |
5857 | + return -ENOMEM; | |
5858 | +} | |
5859 | + | |
5860 | +EXPORT_SYMBOL(evms_cs_allocate_logical_node); | |
5861 | + | |
5862 | +void | |
5863 | +evms_cs_deallocate_volume_info(struct evms_logical_node *p) | |
5864 | +{ | |
5865 | + if (p->iflags & EVMS_FEATURE_BOTTOM) { | |
5866 | + evms_cs_remove_item_from_list(&evms_global_feature_node_list, | |
5867 | + p); | |
5868 | + kfree(p->volume_info); | |
5869 | + p->volume_info = NULL; | |
5870 | + p->iflags &= ~EVMS_FEATURE_BOTTOM; | |
5871 | + } | |
5872 | +} | |
5873 | + | |
5874 | +EXPORT_SYMBOL(evms_cs_deallocate_volume_info); | |
5875 | + | |
5876 | +void | |
5877 | +evms_cs_deallocate_logical_node(struct evms_logical_node *p) | |
5878 | +{ | |
5879 | + if (p->next) { | |
5880 | + LOG_SERIOUS | |
5881 | + ("Deallocating object whose NEXT ptr is not null!!\n"); | |
5882 | + } | |
5883 | + evms_cs_deallocate_volume_info(p); | |
5884 | + if (p->feature_header) { | |
5885 | + kfree(p->feature_header); | |
5886 | + p->feature_header = NULL; | |
5887 | + } | |
5888 | + kfree(p); | |
5889 | + atomic_dec(&evms_logical_nodes); | |
5890 | +} | |
5891 | + | |
5892 | +EXPORT_SYMBOL(evms_cs_deallocate_logical_node); | |
5893 | + | |
5894 | +/* | |
5895 | + * Function: evms_cs_register_plugin | |
5896 | + * Description: This function is exported so that all plugins can register with EVMS | |
5897 | + */ | |
5898 | +int | |
5899 | +evms_cs_register_plugin(struct evms_plugin_header *plugin) | |
5900 | +{ | |
5901 | + int rc = 0; | |
5902 | + struct evms_registered_plugin *reg_record, **pp; | |
5903 | + struct evms_version *ver; | |
5904 | + | |
5905 | + ver = &plugin->required_services_version; | |
5906 | + | |
5907 | + LOG_EXTRA | |
5908 | + ("registering plugin (plugin.id=%d.%d.%d, plugin.ver=%d.%d.%d, req.svc.ver=%d.%d.%d)\n", | |
5909 | + GetPluginOEM(plugin->id), GetPluginType(plugin->id), | |
5910 | + GetPluginID(plugin->id), plugin->version.major, | |
5911 | + plugin->version.minor, plugin->version.patchlevel, ver->major, | |
5912 | + ver->minor, ver->patchlevel); | |
5913 | + | |
5914 | + /* check common services requirements */ | |
5915 | + rc = evms_cs_check_version(ver, &evms_svc_version); | |
5916 | + if (rc) { | |
5917 | + LOG_SERIOUS | |
5918 | + ("plugin failed to load: common services (vers:%d,%d,%d) incompatibility!\n", | |
5919 | + EVMS_COMMON_SERVICES_MAJOR, EVMS_COMMON_SERVICES_MINOR, | |
5920 | + EVMS_COMMON_SERVICES_PATCHLEVEL); | |
5921 | + } | |
5922 | + if (!rc) { | |
5923 | + /* ensure a plugin with this feature id is | |
5924 | + * not already loaded. | |
5925 | + */ | |
5926 | + for (pp = ®istered_plugin_head; *pp; pp = &(*pp)->next) { | |
5927 | + if ((*pp)->plugin->id == plugin->id) { | |
5928 | + rc = -EBUSY; | |
5929 | + LOG_ERROR | |
5930 | + ("error(%d) attempting to load another plugin with id(%x).\n", | |
5931 | + rc, plugin->id); | |
5932 | + } | |
5933 | + } | |
5934 | + } | |
5935 | + if (!rc) { | |
5936 | + /* ensure the plugin has provided functions for | |
5937 | + * the mandatory entry points. | |
5938 | + */ | |
5939 | + if (!plugin->fops->discover) { | |
5940 | + rc = -EINVAL; | |
5941 | + } else if (!plugin->fops->init_io) { | |
5942 | + rc = -EINVAL; | |
5943 | + } else if (!plugin->fops->ioctl) { | |
5944 | + rc = -EINVAL; | |
5945 | + } else if (!plugin->fops->read) { | |
5946 | + rc = -EINVAL; | |
5947 | + } else if (!plugin->fops->write) { | |
5948 | + rc = -EINVAL; | |
5949 | + } else if (!plugin->fops->delete) { | |
5950 | + rc = -EINVAL; | |
5951 | + } | |
5952 | + } | |
5953 | + if (!rc) { | |
5954 | + /* allocate a new plugin registration record */ | |
5955 | + reg_record = | |
5956 | + kmalloc(sizeof (struct evms_registered_plugin), GFP_KERNEL); | |
5957 | + if (!reg_record) { | |
5958 | + rc = -ENOMEM; | |
5959 | + } | |
5960 | + } | |
5961 | + if (!rc) { | |
5962 | + memset(reg_record, 0, sizeof (struct evms_registered_plugin)); | |
5963 | + /* store ptr to plugin header in new registration record */ | |
5964 | + reg_record->plugin = plugin; | |
5965 | + | |
5966 | + /* terminate the record */ | |
5967 | + reg_record->next = NULL; | |
5968 | + | |
5969 | + /* find end of the plugin registration list */ | |
5970 | + for (pp = ®istered_plugin_head; *pp; pp = &(*pp)->next) ; | |
5971 | + /* add registration record to list */ | |
5972 | + *pp = reg_record; | |
5973 | + | |
5974 | + /* increment the usage count */ | |
5975 | + MOD_INC_USE_COUNT; | |
5976 | + } | |
5977 | + | |
5978 | + return (rc); | |
5979 | +} | |
5980 | + | |
5981 | +EXPORT_SYMBOL(evms_cs_register_plugin); | |
5982 | + | |
5983 | +/* | |
5984 | + * Function: evms_cs_unregister_plugin | |
5985 | + * Description: This function is exported so that all plugins can | |
5986 | + * unregister with EVMS | |
5987 | + */ | |
5988 | +int | |
5989 | +evms_cs_unregister_plugin(struct evms_plugin_header *plugin) | |
5990 | +{ | |
5991 | + int rc = 0, found = FALSE; | |
5992 | + struct evms_registered_plugin **pp; | |
5993 | + struct evms_version *ver; | |
5994 | + | |
5995 | + ver = &plugin->required_services_version; | |
5996 | + | |
5997 | + LOG_EXTRA | |
5998 | + ("unregistering plugin (plugin.id=%d.%d.%d, plugin.ver=%d.%d.%d, req.svc.ver=%d.%d.%d)\n", | |
5999 | + GetPluginOEM(plugin->id), GetPluginType(plugin->id), | |
6000 | + GetPluginID(plugin->id), plugin->version.major, | |
6001 | + plugin->version.minor, plugin->version.patchlevel, ver->major, | |
6002 | + ver->minor, ver->patchlevel); | |
6003 | + /* ensure a plugin with this feature id is | |
6004 | + * currently loaded. | |
6005 | + */ | |
6006 | + for (pp = ®istered_plugin_head; *pp; pp = &(*pp)->next) { | |
6007 | + if ((*pp)->plugin->id == plugin->id) { | |
6008 | + found = TRUE; | |
6009 | + break; | |
6010 | + } | |
6011 | + } | |
6012 | + if (!found) { | |
6013 | + rc = -ENOPKG; | |
6014 | + LOG_ERROR | |
6015 | + ("error(%d) attempt to unload a non-loaded plugin with id(%x).\n", | |
6016 | + rc, plugin->id); | |
6017 | + } | |
6018 | + /* actually unload the plugin now */ | |
6019 | + if (!rc) { | |
6020 | + struct evms_registered_plugin *tmp = *pp; | |
6021 | + | |
6022 | + /* remove the plugin record from our | |
6023 | + * internal plugin list | |
6024 | + */ | |
6025 | + *pp = (*pp)->next; | |
6026 | + /* deallocate the plugin registration record | |
6027 | + */ | |
6028 | + kfree(tmp); | |
6029 | + | |
6030 | + /* decrement the usage count */ | |
6031 | + MOD_DEC_USE_COUNT; | |
6032 | + } | |
6033 | + return (rc); | |
6034 | +} | |
6035 | + | |
6036 | +EXPORT_SYMBOL(evms_cs_unregister_plugin); | |
6037 | + | |
6038 | +/* function: evms_cs_add_logical_node_to_list | |
6039 | + * | |
6040 | + * This functions adds a new logical node to the end of a | |
6041 | + * node list. | |
6042 | + * | |
6043 | + * NOTE: This function is only expected to be called at | |
6044 | + * discovery time, which is singled threaded by nature, | |
6045 | + * and therefore doesn't need to be made SMP safe. | |
6046 | + */ | |
6047 | +int | |
6048 | +evms_cs_add_logical_node_to_list(struct evms_logical_node **list_head, | |
6049 | + struct evms_logical_node *node) | |
6050 | +{ | |
6051 | + int rc = 0; | |
6052 | + struct evms_logical_node **pp = NULL; | |
6053 | + | |
6054 | + /* check to make sure node is not already on a list */ | |
6055 | + if (node->next) | |
6056 | + rc = 1; | |
6057 | + else | |
6058 | + /* check to make sure node being added is not already in the list */ | |
6059 | + for (pp = list_head; *pp; pp = &(*pp)->next) | |
6060 | + if (*pp == node) { | |
6061 | + rc = 2; | |
6062 | + break; | |
6063 | + } | |
6064 | + | |
6065 | + /* add node to the end of the list */ | |
6066 | + if (!rc) | |
6067 | + *pp = node; | |
6068 | + | |
6069 | + return (rc); | |
6070 | +} | |
6071 | + | |
6072 | +EXPORT_SYMBOL(evms_cs_add_logical_node_to_list); | |
6073 | + | |
6074 | +/* function: evms_cs_remove_logical_node_from_list | |
6075 | + * | |
6076 | + * This functions removes a new logical node from a node list. | |
6077 | + * | |
6078 | + * NOTE: This function is only expected to be called at | |
6079 | + * discovery time, which is singled threaded by nature, | |
6080 | + * and therefore doesn't need to be made SMP safe. | |
6081 | + */ | |
6082 | +int | |
6083 | +evms_cs_remove_logical_node_from_list(struct evms_logical_node **list_head, | |
6084 | + struct evms_logical_node *node) | |
6085 | +{ | |
6086 | + /* remove this node from the head of the list */ | |
6087 | + int rc = 1; /* assume failure until target node is found */ | |
6088 | + struct evms_logical_node **pp; | |
6089 | + for (pp = list_head; *pp; pp = &(*pp)->next) | |
6090 | + if (*pp == node) { | |
6091 | + *pp = (*pp)->next; | |
6092 | + node->next = NULL; | |
6093 | + rc = 0; | |
6094 | + break; | |
6095 | + } | |
6096 | + return (rc); | |
6097 | +} | |
6098 | + | |
6099 | +EXPORT_SYMBOL(evms_cs_remove_logical_node_from_list); | |
6100 | + | |
6101 | +int | |
6102 | +evms_cs_kernel_ioctl(struct evms_logical_node *node, unsigned int cmd, | |
6103 | + unsigned long arg) | |
6104 | +{ | |
6105 | + int rc = 0; | |
6106 | + struct inode tmp_inode; | |
6107 | + mm_segment_t fs; | |
6108 | + | |
6109 | + lock_kernel(); | |
6110 | + fs = get_fs(); | |
6111 | + set_fs(get_ds()); | |
6112 | + rc = IOCTL(node, &tmp_inode, NULL, cmd, arg); | |
6113 | + set_fs(fs); | |
6114 | + unlock_kernel(); | |
6115 | + | |
6116 | + return (rc); | |
6117 | + | |
6118 | +} | |
6119 | + | |
6120 | +EXPORT_SYMBOL(evms_cs_kernel_ioctl); | |
6121 | + | |
6122 | +/* | |
6123 | + * function: evms_cs_size_in_vsectors | |
6124 | + * | |
6125 | + * In EVMS a V(irtual)Sector is 512 bytes in size. | |
6126 | + * This function computes the number of VSECTORs an specified | |
6127 | + * item size would require. | |
6128 | + * | |
6129 | + * NOTE: This function has been coded to work with 64 bit values. | |
6130 | + */ | |
6131 | +unsigned long | |
6132 | +evms_cs_size_in_vsectors(long long item_size) | |
6133 | +{ | |
6134 | + long long sectors; | |
6135 | + | |
6136 | + sectors = item_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
6137 | + if (item_size & (EVMS_VSECTOR_SIZE - 1)) | |
6138 | + sectors++; | |
6139 | + | |
6140 | + return (sectors); | |
6141 | +} | |
6142 | + | |
6143 | +EXPORT_SYMBOL(evms_cs_size_in_vsectors); | |
6144 | + | |
6145 | +/* | |
6146 | + * function: evms_cs_log2 | |
6147 | + * | |
6148 | + * this function computes the power of the 2 of specified | |
6149 | + * value. If the value is 0, a -1 is returned. If the value | |
6150 | + * is NOT a power of 2, a -2 is return. Otherwise the power | |
6151 | + * of 2 is returned. | |
6152 | + */ | |
6153 | +int | |
6154 | +evms_cs_log2(long long value) | |
6155 | +{ | |
6156 | + int result = -1; | |
6157 | + long long tmp; | |
6158 | + | |
6159 | + if (value) { | |
6160 | + tmp = value; | |
6161 | + result++; | |
6162 | + while (!(tmp & 1)) { | |
6163 | + result++; | |
6164 | + tmp >>= 1; | |
6165 | + } | |
6166 | + if (tmp != 1) { | |
6167 | + result = -2; | |
6168 | + } | |
6169 | + } | |
6170 | + return (result); | |
6171 | +} | |
6172 | + | |
6173 | +EXPORT_SYMBOL(evms_cs_log2); | |
6174 | + | |
6175 | +/* | |
6176 | + * Functions: | |
6177 | + * | |
6178 | + * build_crc_table() | |
6179 | + * calculate_crc() | |
6180 | + * | |
6181 | + * | |
6182 | + * Description: The functions in this module provide a means of calculating | |
6183 | + * the 32 bit CRC for a block of data. build_crc_table must | |
6184 | + * be called to initialize this module. calculate_crc must | |
6185 | + * NOT be used until after build_crc_table has been called. | |
6186 | + * Once build_crc_table has been called, calculate_crc can | |
6187 | + * be used to calculate the crc of the data residing in a | |
6188 | + * user specified buffer. | |
6189 | + * | |
6190 | + */ | |
6191 | + | |
6192 | +#define CRC_POLYNOMIAL 0xEDB88320L | |
6193 | + | |
6194 | +static u32 crc_table[256]; | |
6195 | +static u32 crc_table_built = FALSE; | |
6196 | + | |
6197 | +/*********************************************************************/ | |
6198 | +/* */ | |
6199 | +/* Function Name: build_crc_table */ | |
6200 | +/* */ | |
6201 | +/* Descriptive Name: This module implements the crc function using */ | |
6202 | +/* a table driven method. The required table */ | |
6203 | +/* must be setup before the calculate_crc */ | |
6204 | +/* function can be used. This table only needs */ | |
6205 | +/* to be set up once. This function sets up the */ | |
6206 | +/* crc table needed by calculate_crc. */ | |
6207 | +/* */ | |
6208 | +/* Input: None */ | |
6209 | +/* */ | |
6210 | +/* Output: None */ | |
6211 | +/* */ | |
6212 | +/* Error Handling: N/A */ | |
6213 | +/* */ | |
6214 | +/* Side Effects: The internal crc table is initialized. */ | |
6215 | +/* */ | |
6216 | +/* Notes: None. */ | |
6217 | +/* */ | |
6218 | +/*********************************************************************/ | |
6219 | +static void | |
6220 | +build_crc_table(void) | |
6221 | +{ | |
6222 | + u32 i, j, crc; | |
6223 | + | |
6224 | + for (i = 0; i <= 255; i++) { | |
6225 | + crc = i; | |
6226 | + for (j = 8; j > 0; j--) { | |
6227 | + if (crc & 1) | |
6228 | + crc = (crc >> 1) ^ CRC_POLYNOMIAL; | |
6229 | + else | |
6230 | + crc >>= 1; | |
6231 | + } | |
6232 | + crc_table[i] = crc; | |
6233 | + } | |
6234 | + crc_table_built = TRUE; | |
6235 | +} | |
6236 | + | |
6237 | +/*********************************************************************/ | |
6238 | +/* */ | |
6239 | +/* Function Name: calculate_crc */ | |
6240 | +/* */ | |
6241 | +/* Descriptive Name: This function calculates the crc value for */ | |
6242 | +/* the data in the buffer specified by Buffer. */ | |
6243 | +/* */ | |
6244 | +/* Input: u32 crc : This is the starting crc. If you are */ | |
6245 | +/* starting a new crc calculation, then */ | |
6246 | +/* this should be set to 0xFFFFFFFF. If */ | |
6247 | +/* you are continuing a crc calculation */ | |
6248 | +/* (i.e. all of the data did not fit in */ | |
6249 | +/* the buffer so you could not calculate */ | |
6250 | +/* the crc in a single operation), then */ | |
6251 | +/* this is the crc output by the last */ | |
6252 | +/* calculate_crc call. */ | |
6253 | +/* */ | |
6254 | +/* Output: The crc for the data in the buffer, based upon the value*/ | |
6255 | +/* of the input parameter crc. */ | |
6256 | +/* */ | |
6257 | +/* Error Handling: None. */ | |
6258 | +/* */ | |
6259 | +/* Side Effects: None. */ | |
6260 | +/* */ | |
6261 | +/* Notes: None. */ | |
6262 | +/* */ | |
6263 | +/*********************************************************************/ | |
6264 | +u32 | |
6265 | +evms_cs_calculate_crc(u32 crc, void *buffer, u32 buffersize) | |
6266 | +{ | |
6267 | + unsigned char *current_byte; | |
6268 | + u32 temp1, temp2, i; | |
6269 | + | |
6270 | + current_byte = (unsigned char *) buffer; | |
6271 | + /* Make sure the crc table is available */ | |
6272 | + if (crc_table_built == FALSE) | |
6273 | + build_crc_table(); | |
6274 | + /* Process each byte in the buffer. */ | |
6275 | + for (i = 0; i < buffersize; i++) { | |
6276 | + temp1 = (crc >> 8) & 0x00FFFFFF; | |
6277 | + temp2 = | |
6278 | + crc_table[(crc ^ (u32) * | |
6279 | + current_byte) & (u32) 0xff]; | |
6280 | + current_byte++; | |
6281 | + crc = temp1 ^ temp2; | |
6282 | + } | |
6283 | + return (crc); | |
6284 | +} | |
6285 | + | |
6286 | +EXPORT_SYMBOL(evms_cs_calculate_crc); | |
6287 | + | |
6288 | +#define EVMS_ORIGINAL_CALLBACK_FLAG 1<<0 | |
6289 | +typedef struct io_notify_s { | |
6290 | + unsigned int flags; | |
6291 | + void *private; | |
6292 | + struct buffer_head *bh; | |
6293 | + u64 rsector; | |
6294 | + kdev_t rdev; | |
6295 | + void *b_private; | |
6296 | + void (*callback_function) (struct evms_logical_node * node, | |
6297 | + struct buffer_head * bh, | |
6298 | + int uptodate, int *redrive); | |
6299 | + struct io_notify_s *next; | |
6300 | +} io_notify_t; | |
6301 | + | |
6302 | +struct evms_pool_mgmt * | |
6303 | +evms_cs_create_pool(int objsize, | |
6304 | + u8 * pool_name, | |
6305 | + void (*ctor) (void *, kmem_cache_t *, unsigned long), | |
6306 | + void (*dtor) (void *, kmem_cache_t *, unsigned long)) | |
6307 | +{ | |
6308 | + struct evms_pool_mgmt *pool; | |
6309 | + | |
6310 | + /* create the pool management structure */ | |
6311 | + pool = kmalloc(sizeof (struct evms_pool_mgmt), GFP_KERNEL); | |
6312 | + if (!pool) { | |
6313 | + LOG_CRITICAL("Cannot create %s fpool mgmt structure", | |
6314 | + pool_name); | |
6315 | + return NULL; | |
6316 | + } | |
6317 | + /* initialize various field in pool mgmt structure */ | |
6318 | + memset(pool, 0, sizeof (struct evms_pool_mgmt)); | |
6319 | + pool->member_size = objsize; | |
6320 | + pool->name = pool_name; | |
6321 | + pool->waiters = (atomic_t) ATOMIC_INIT(0); | |
6322 | + init_waitqueue_head(&pool->wait_queue); | |
6323 | + /* go create the pool */ | |
6324 | + pool->cachep = kmem_cache_create(pool->name, | |
6325 | + pool->member_size, | |
6326 | + 0, SLAB_HWCACHE_ALIGN, ctor, dtor); | |
6327 | + if (!pool->cachep) | |
6328 | + panic("Cannot create %s SLAB cache", pool->name); | |
6329 | + return (pool); | |
6330 | +} | |
6331 | + | |
6332 | +EXPORT_SYMBOL(evms_cs_create_pool); | |
6333 | + | |
6334 | +void * | |
6335 | +evms_cs_allocate_from_pool(struct evms_pool_mgmt *pool, int blockable) | |
6336 | +{ | |
6337 | + void *objp; | |
6338 | + | |
6339 | + while (1) { | |
6340 | + objp = kmem_cache_alloc(pool->cachep, SLAB_NOIO); | |
6341 | + if (objp || !blockable) { | |
6342 | + return (objp); | |
6343 | + } else { | |
6344 | + /* block and wait for an object to | |
6345 | + * be returned to the pool | |
6346 | + */ | |
6347 | + atomic_inc(&pool->waiters); | |
6348 | + wait_event(pool->wait_queue, | |
6349 | + (!atomic_read(&pool->waiters))); | |
6350 | + } | |
6351 | + } | |
6352 | + return (objp); | |
6353 | +} | |
6354 | + | |
6355 | +EXPORT_SYMBOL(evms_cs_allocate_from_pool); | |
6356 | + | |
6357 | +void | |
6358 | +evms_cs_deallocate_to_pool(struct evms_pool_mgmt *pool, void *objp) | |
6359 | +{ | |
6360 | + kmem_cache_free(pool->cachep, objp); | |
6361 | + atomic_set(&pool->waiters, 0); | |
6362 | + if (waitqueue_active(&pool->wait_queue)) { | |
6363 | + wake_up(&pool->wait_queue); | |
6364 | + } | |
6365 | +} | |
6366 | + | |
6367 | +EXPORT_SYMBOL(evms_cs_deallocate_to_pool); | |
6368 | + | |
6369 | +void | |
6370 | +evms_cs_destroy_pool(struct evms_pool_mgmt *pool) | |
6371 | +{ | |
6372 | + kmem_cache_destroy(pool->cachep); | |
6373 | + kfree(pool); | |
6374 | +} | |
6375 | + | |
6376 | +EXPORT_SYMBOL(evms_cs_destroy_pool); | |
6377 | + | |
6378 | +/* | |
6379 | + * function: evms_end_io | |
6380 | + * | |
6381 | + * This is a support function for | |
6382 | + * evms_cs_register_for_end_io_notification. | |
6383 | + * This function is called during I/O completion on any buffer | |
6384 | + * head that was registered by a plugin. Control is passed here | |
6385 | + * and this routine will, thru the use of the I/O notify entry | |
6386 | + * stored in the b_private field of the buffer head, restore | |
6387 | + * the b_rsector value the buffer head had at the time of | |
6388 | + * registration and pass control to the registered callback | |
6389 | + * address, with pointers to the buffer head and an optional | |
6390 | + * plugin private data. Upon completion of the callback, | |
6391 | + * control is returned back here. The io notify list entry | |
6392 | + * is deleted. This process repeats until this routine | |
6393 | + * detects that all registered plugins have been called back | |
6394 | + * and the buffer head's original end_io function has been | |
6395 | + * called. At this point the DONE flag is set, and we terminate | |
6396 | + * callback loop and exit. | |
6397 | + * | |
6398 | + * Plugins may desire to break or interrupt the callback | |
6399 | + * sequence or chain. This may be useful to redrive I/O or | |
6400 | + * to wait for other buffer heads to complete before | |
6401 | + * allowing the original buffer head callback to occur. | |
6402 | + * To interrupt the callback "chain", a registered | |
6403 | + * plugin's callback must return with the DONE flag set. | |
6404 | + * | |
6405 | + * NOTE: If a plugin set the DONE flag, and wishes to redrive | |
6406 | + * a buffer head, the plugin MUST reregister the buffer head | |
6407 | + * to receive another callback on this buffer head. Also, the | |
6408 | + * plugin MUST ensure that the original buffer head end_io | |
6409 | + * function get called at some point, either by reregistering | |
6410 | + * this buffer head and receiving another callback, or by | |
6411 | + * means of buffer head aggregation triggered by the callbacks | |
6412 | + * of other buffer heads. | |
6413 | + * | |
6414 | + */ | |
6415 | +static void | |
6416 | +evms_end_io(struct buffer_head *bh, int uptodate) | |
6417 | +{ | |
6418 | + io_notify_t *entry; | |
6419 | + int done; | |
6420 | + | |
6421 | + done = FALSE; | |
6422 | + while (!done) { | |
6423 | + /* retrieve the io_notify_entry ptr from | |
6424 | + * the b_private field in the buffer head. | |
6425 | + */ | |
6426 | + entry = (io_notify_t *) bh->b_private; | |
6427 | + | |
6428 | + /* restore the b_private value to | |
6429 | + * the previous b_private value (which | |
6430 | + * should be a previous io_notify_entry | |
6431 | + * or the original b_private pointer). | |
6432 | + */ | |
6433 | + bh->b_private = entry->b_private; | |
6434 | + | |
6435 | + /* check for original callback for this bh */ | |
6436 | + if (entry->flags & EVMS_ORIGINAL_CALLBACK_FLAG) { | |
6437 | + /* this is the original for bh */ | |
6438 | + | |
6439 | + /* turn off flag marking this as the original */ | |
6440 | + entry->flags &= ~EVMS_ORIGINAL_CALLBACK_FLAG; | |
6441 | + | |
6442 | + /* decrement volume's requests_in_progress var */ | |
6443 | + atomic_dec(&evms_logical_volumes[MINOR(bh->b_rdev)]. | |
6444 | + requests_in_progress); | |
6445 | + | |
6446 | + /* restore b_end_io to original value */ | |
6447 | + bh->b_end_io = (void *) entry->callback_function; | |
6448 | + if (bh->b_end_io) { | |
6449 | + /* invoke original callback function | |
6450 | + * if it exists. | |
6451 | + */ | |
6452 | + bh->b_end_io(bh, uptodate); | |
6453 | + } | |
6454 | + done = TRUE; | |
6455 | + } else { | |
6456 | + /* this is a plugin callback */ | |
6457 | + | |
6458 | + /* restore the rsector value to the | |
6459 | + * value at the time of callback | |
6460 | + * registration. | |
6461 | + */ | |
6462 | + bh->b_rsector = entry->rsector; | |
6463 | + bh->b_rdev = entry->rdev; | |
6464 | + /* invoke plugin callback function */ | |
6465 | + entry->callback_function(entry->private, bh, uptodate, | |
6466 | + &done); | |
6467 | + } | |
6468 | + /* free the io notify entry */ | |
6469 | + evms_cs_deallocate_to_pool(evms_io_notify_pool, entry); | |
6470 | + } | |
6471 | +} | |
6472 | + | |
6473 | +/* | |
6474 | + * function: evms_cs_register_for_end_io_notification | |
6475 | + * | |
6476 | + * This function is an evms common service. | |
6477 | + * This routine allows a (plugin) function to register to | |
6478 | + * participate in the io completion notification process. | |
6479 | + * This is useful for plugins which alter data after it | |
6480 | + * has been read from the disk (i.e. encryption or | |
6481 | + * compression). | |
6482 | + * | |
6483 | + * This routine also records the rsector value at the time | |
6484 | + * of registration, so that it can be restored to that value | |
6485 | + * prior to the callback to a plugin, thus allowing that | |
6486 | + * plugin to work with the value it had seen during the | |
6487 | + * initiating I/O request. | |
6488 | + * | |
6489 | + * This routine also records a private data pointer at the | |
6490 | + * time of registration, and is returned to the plugin | |
6491 | + * at callback time. This private data pointer was designed | |
6492 | + * to contain context/callback/buffer_head specific data, and | |
6493 | + * frees the plugin from having to store and find associated | |
6494 | + * data at the time of the callback. This field is not used | |
6495 | + * by this function and is optional (NULL if unused). It is | |
6496 | + * recorded and returned as a convenience for the plugins. | |
6497 | + * | |
6498 | + * DANGER!!! - WILL ROBINSON - DANGER!!! | |
6499 | + * This routine uses the b_private field in the | |
6500 | + * buffer_head structure. If any lower level driver uses this | |
6501 | + * field and do NOT restore it, the I/O callback will fail!! | |
6502 | + * | |
6503 | + * Any plugins writers requiring a field for private storage | |
6504 | + * should instead use the private field parameter in this | |
6505 | + * function to store their private data. | |
6506 | + * | |
6507 | + */ | |
6508 | + | |
6509 | +int | |
6510 | +evms_cs_register_for_end_io_notification(void *private, | |
6511 | + struct buffer_head *bh, | |
6512 | + void *callback_function) | |
6513 | +{ | |
6514 | + int rc = 0, done; | |
6515 | + io_notify_t *new_entry; | |
6516 | + | |
6517 | + done = FALSE; | |
6518 | + while (!done) { | |
6519 | + /* allocate a notify entry */ | |
6520 | + new_entry = | |
6521 | + evms_cs_allocate_from_pool(evms_io_notify_pool, | |
6522 | + EVMS_BLOCKABLE); | |
6523 | + if (!new_entry) { | |
6524 | + schedule(); | |
6525 | + continue; | |
6526 | + } | |
6527 | + | |
6528 | + /* initialize notify entry */ | |
6529 | + new_entry->private = private; | |
6530 | + new_entry->bh = bh; | |
6531 | + new_entry->rsector = bh->b_rsector; | |
6532 | + new_entry->rdev = bh->b_rdev; | |
6533 | + new_entry->b_private = bh->b_private; | |
6534 | + new_entry->flags = 0; | |
6535 | + | |
6536 | + /* is this the first callback for this bh? */ | |
6537 | + if (bh->b_end_io != evms_end_io) { | |
6538 | + /* yes, first callback */ | |
6539 | + new_entry->flags |= EVMS_ORIGINAL_CALLBACK_FLAG; | |
6540 | + new_entry->callback_function = (void *) bh->b_end_io; | |
6541 | + | |
6542 | + /* increment volume's requests_in_progress var */ | |
6543 | + atomic_inc(&evms_logical_volumes[MINOR(bh->b_rdev)]. | |
6544 | + requests_in_progress); | |
6545 | + | |
6546 | + /* set b_end_io so we get control */ | |
6547 | + bh->b_end_io = evms_end_io; | |
6548 | + } else { | |
6549 | + /* no, not first callback */ | |
6550 | + new_entry->callback_function = callback_function; | |
6551 | + done = TRUE; | |
6552 | + } | |
6553 | + /* set b_private to aid in quick lookup */ | |
6554 | + bh->b_private = new_entry; | |
6555 | + } | |
6556 | + return (rc); | |
6557 | +} | |
6558 | + | |
6559 | +EXPORT_SYMBOL(evms_cs_register_for_end_io_notification); | |
6560 | + | |
6561 | +/* function description: evms_cs_lookup_item_in_list | |
6562 | + * | |
6563 | + * this function searches for the specified item in the | |
6564 | + * specified node list. it returns the address of the | |
6565 | + * evms_list_node containing the specified item. | |
6566 | + */ | |
6567 | +struct evms_list_node ** | |
6568 | +evms_cs_lookup_item_in_list(struct evms_list_node **node_list, void *item) | |
6569 | +{ | |
6570 | + struct evms_list_node **list_node; | |
6571 | + | |
6572 | + list_node = node_list; | |
6573 | + while (*list_node) { | |
6574 | + if ((*list_node)->item == item) | |
6575 | + break; | |
6576 | + list_node = &(*list_node)->next; | |
6577 | + } | |
6578 | + return (list_node); | |
6579 | +} | |
6580 | + | |
6581 | +EXPORT_SYMBOL(evms_cs_lookup_item_in_list); | |
6582 | + | |
6583 | +/* function description: evms_add_item_to_list | |
6584 | + * | |
6585 | + * this function adds an item to the list. the | |
6586 | + * node for the new item is added to the end | |
6587 | + * of the list. the list is traversed to find the end. | |
6588 | + * while the traversal occurs, the list is checked | |
6589 | + * for the presence of the specified item. if already | |
6590 | + * present in the list, and error code is returned. | |
6591 | + */ | |
6592 | +/* function description: evms_cs_add_item_to_list | |
6593 | + * | |
6594 | + * this function adds an item to an item list. | |
6595 | + * | |
6596 | + * RC == 0 is returned for: | |
6597 | + * a successful add of a new item | |
6598 | + * | |
6599 | + * RC == 1 is returned when: | |
6600 | + * the item is already on the list | |
6601 | + * | |
6602 | + * RC < 0 is returned for an error attempting to add the item. | |
6603 | + */ | |
6604 | +int | |
6605 | +evms_cs_add_item_to_list(struct evms_list_node **list, void *item) | |
6606 | +{ | |
6607 | + int rc = 0; | |
6608 | + struct evms_list_node **list_node, *new_node; | |
6609 | + | |
6610 | + list_node = evms_cs_lookup_item_in_list(list, item); | |
6611 | + if (*list_node == NULL) { | |
6612 | + new_node = kmalloc(sizeof (struct evms_list_node), GFP_NOIO); | |
6613 | + if (new_node) { | |
6614 | + memset(new_node, 0, sizeof (struct evms_list_node)); | |
6615 | + new_node->item = item; | |
6616 | + *list_node = new_node; | |
6617 | + } else { | |
6618 | + rc = -ENOMEM; | |
6619 | + } | |
6620 | + } else { | |
6621 | + rc = 1; | |
6622 | + LOG_DEBUG | |
6623 | + ("warning: attempt to add duplicate item(%p) to list(%p).\n", | |
6624 | + item, list); | |
6625 | + } | |
6626 | + return (rc); | |
6627 | +} | |
6628 | + | |
6629 | +EXPORT_SYMBOL(evms_cs_add_item_to_list); | |
6630 | + | |
6631 | +/* function description: evms_remove_item_from_list | |
6632 | + * | |
6633 | + * this function removes a specified item from the | |
6634 | + * specified list. if the specified item is not | |
6635 | + * found in the list, and error is returned. | |
6636 | + */ | |
6637 | +int | |
6638 | +evms_cs_remove_item_from_list(struct evms_list_node **list, void *item) | |
6639 | +{ | |
6640 | + int rc = 0; | |
6641 | + struct evms_list_node **list_node; | |
6642 | + | |
6643 | + /* check to see if item is in the list */ | |
6644 | + list_node = evms_cs_lookup_item_in_list(list, item); | |
6645 | + | |
6646 | + /* was the node found in the list? */ | |
6647 | + if (*list_node) { | |
6648 | + /* yes, it was found */ | |
6649 | + struct evms_list_node *tmp_node; | |
6650 | + | |
6651 | + /* save ptr to node being removed */ | |
6652 | + tmp_node = *list_node; | |
6653 | + /* remove it from the global list */ | |
6654 | + *list_node = tmp_node->next; | |
6655 | + /* delete removed node */ | |
6656 | + kfree(tmp_node); | |
6657 | + } else { | |
6658 | + /* no, it was not found */ | |
6659 | + rc = -1; | |
6660 | + LOG_ERROR | |
6661 | + ("error(%d): attempt to remove nonexistant node(%p) from list(%p).\n", | |
6662 | + rc, item, list); | |
6663 | + } | |
6664 | + return (rc); | |
6665 | +} | |
6666 | + | |
6667 | +EXPORT_SYMBOL(evms_cs_remove_item_from_list); | |
6668 | + | |
6669 | +/* function description: evms_cs_register_device | |
6670 | + * | |
6671 | + * this function adds a device to the EVMS global device list. | |
6672 | + * | |
6673 | + * RC == 0 is returned for: | |
6674 | + * a successful add of a new device | |
6675 | + * | |
6676 | + * RC == 1 is returned when: | |
6677 | + * the device is already on the list | |
6678 | + * | |
6679 | + * RC < 0 is returned for an error attempting to add the device. | |
6680 | + */ | |
6681 | +int | |
6682 | +evms_cs_register_device(struct evms_logical_node *device) | |
6683 | +{ | |
6684 | + return (evms_cs_add_item_to_list(&evms_global_device_list, device)); | |
6685 | +} | |
6686 | + | |
6687 | +EXPORT_SYMBOL(evms_cs_register_device); | |
6688 | + | |
6689 | +/* function description: evms_cs_unregister_device | |
6690 | + * | |
6691 | + * this function removes a device from the EVMS global device list. | |
6692 | + * | |
6693 | + * RC == 0 is returned for: | |
6694 | + * a successful removal of the specified device | |
6695 | + * | |
6696 | + * RC < 0 is returned for an error attempting to add the device. | |
6697 | + * -ENODATA is returned if specified device is not found. | |
6698 | + */ | |
6699 | +int | |
6700 | +evms_cs_unregister_device(struct evms_logical_node *device) | |
6701 | +{ | |
6702 | + return (evms_cs_remove_item_from_list(&evms_global_device_list, | |
6703 | + device)); | |
6704 | +} | |
6705 | + | |
6706 | +EXPORT_SYMBOL(evms_cs_unregister_device); | |
6707 | + | |
6708 | +static struct evms_list_node *find_first_next_list_node = NULL; | |
6709 | +int | |
6710 | +evms_cs_find_next_device(struct evms_logical_node *in_device, | |
6711 | + struct evms_logical_node **out_device) | |
6712 | +{ | |
6713 | + int rc = 0; | |
6714 | + struct evms_list_node **list_node; | |
6715 | + | |
6716 | + if (in_device == NULL) | |
6717 | + find_first_next_list_node = evms_global_device_list; | |
6718 | + else { | |
6719 | + list_node = | |
6720 | + evms_cs_lookup_item_in_list(&evms_global_device_list, | |
6721 | + in_device); | |
6722 | + find_first_next_list_node = *list_node; | |
6723 | + if (find_first_next_list_node == NULL) | |
6724 | + rc = -ENODATA; | |
6725 | + else | |
6726 | + find_first_next_list_node = | |
6727 | + find_first_next_list_node->next; | |
6728 | + } | |
6729 | + | |
6730 | + if (find_first_next_list_node == NULL) | |
6731 | + *out_device = NULL; | |
6732 | + else | |
6733 | + *out_device = (struct evms_logical_node *) | |
6734 | + find_first_next_list_node->item; | |
6735 | + | |
6736 | + return (rc); | |
6737 | +} | |
6738 | + | |
6739 | +EXPORT_SYMBOL(evms_cs_find_next_device); | |
6740 | + | |
6741 | +void | |
6742 | +evms_cs_signal_event(int eventid) | |
6743 | +{ | |
6744 | + int rc; | |
6745 | + struct evms_list_node **list_node; | |
6746 | + | |
6747 | + /* signal PID(s) of specified event */ | |
6748 | + list_node = &evms_global_notify_list; | |
6749 | + while (*list_node) { | |
6750 | + struct evms_event *event; | |
6751 | + | |
6752 | + event = (*list_node)->item; | |
6753 | + if (event->eventid == eventid) { | |
6754 | + struct task_struct *tsk; | |
6755 | + | |
6756 | + tsk = find_task_by_pid(event->pid); | |
6757 | + if (tsk) { | |
6758 | + struct siginfo siginfo; | |
6759 | + | |
6760 | + siginfo.si_signo = event->signo; | |
6761 | + siginfo.si_errno = 0; | |
6762 | + siginfo.si_code = 0; | |
6763 | + rc = send_sig_info(event->signo, &siginfo, tsk); | |
6764 | + } else { | |
6765 | + /* TODO: | |
6766 | + * unregister this stale | |
6767 | + * notification record | |
6768 | + */ | |
6769 | + } | |
6770 | + } | |
6771 | + list_node = &(*list_node)->next; | |
6772 | + } | |
6773 | +} | |
6774 | + | |
6775 | +EXPORT_SYMBOL(evms_cs_signal_event); | |
6776 | + | |
6777 | +static inline void | |
6778 | +evms_flush_signals(void) | |
6779 | +{ | |
6780 | + spin_lock(¤t->sigmask_lock); | |
6781 | + flush_signals(current); | |
6782 | + spin_unlock(¤t->sigmask_lock); | |
6783 | +} | |
6784 | + | |
6785 | +static inline void | |
6786 | +evms_init_signals(void) | |
6787 | +{ | |
6788 | + current->exit_signal = SIGCHLD; | |
6789 | + siginitsetinv(¤t->blocked, sigmask(SIGKILL)); | |
6790 | +} | |
6791 | + | |
6792 | +static int | |
6793 | +evms_thread(void *arg) | |
6794 | +{ | |
6795 | + struct evms_thread *thread = arg; | |
6796 | + lock_kernel(); | |
6797 | + | |
6798 | + /* | |
6799 | + * Detach thread | |
6800 | + */ | |
6801 | + | |
6802 | + daemonize(); | |
6803 | + | |
6804 | + sprintf(current->comm, thread->name); | |
6805 | + evms_init_signals(); | |
6806 | + evms_flush_signals(); | |
6807 | + thread->tsk = current; | |
6808 | + | |
6809 | + current->policy = SCHED_OTHER; | |
6810 | +#ifdef O1_SCHEDULER | |
6811 | + set_user_nice(current, -20); | |
6812 | +#else | |
6813 | + current->nice = -20; | |
6814 | +#endif | |
6815 | + unlock_kernel(); | |
6816 | + | |
6817 | + complete(thread->event); | |
6818 | + while (thread->run) { | |
6819 | + void (*run) (void *data); | |
6820 | + DECLARE_WAITQUEUE(wait, current); | |
6821 | + | |
6822 | + add_wait_queue(&thread->wqueue, &wait); | |
6823 | +#ifdef O1_SCHEDULER | |
6824 | + set_current_state(TASK_INTERRUPTIBLE); | |
6825 | +#else | |
6826 | + set_task_state(current, TASK_INTERRUPTIBLE); | |
6827 | +#endif | |
6828 | + if (!test_bit(EVMS_THREAD_WAKEUP, &thread->flags)) { | |
6829 | + schedule(); | |
6830 | + } | |
6831 | +#ifdef O1_SCHEDULER | |
6832 | + set_current_state(TASK_RUNNING); | |
6833 | +#else | |
6834 | + current->state = TASK_RUNNING; | |
6835 | +#endif | |
6836 | + remove_wait_queue(&thread->wqueue, &wait); | |
6837 | + clear_bit(EVMS_THREAD_WAKEUP, &thread->flags); | |
6838 | + | |
6839 | + run = thread->run; | |
6840 | + if (run) { | |
6841 | + run(thread->data); | |
6842 | + run_task_queue(&tq_disk); | |
6843 | + } | |
6844 | + if (signal_pending(current)) { | |
6845 | + evms_flush_signals(); | |
6846 | + } | |
6847 | + } | |
6848 | + complete(thread->event); | |
6849 | + return 0; | |
6850 | +} | |
6851 | + | |
6852 | +struct evms_thread * | |
6853 | +evms_cs_register_thread(void (*run) (void *), void *data, const u8 * name) | |
6854 | +{ | |
6855 | + struct evms_thread *thread; | |
6856 | + int ret; | |
6857 | + struct completion event; | |
6858 | + | |
6859 | + thread = kmalloc(sizeof (struct evms_thread), GFP_KERNEL); | |
6860 | + if (!thread) { | |
6861 | + return NULL; | |
6862 | + } | |
6863 | + memset(thread, 0, sizeof (struct evms_thread)); | |
6864 | + init_waitqueue_head(&thread->wqueue); | |
6865 | + | |
6866 | + init_completion(&event); | |
6867 | + thread->event = &event; | |
6868 | + thread->run = run; | |
6869 | + thread->data = data; | |
6870 | + thread->name = name; | |
6871 | + ret = kernel_thread(evms_thread, thread, 0); | |
6872 | + if (ret < 0) { | |
6873 | + kfree(thread); | |
6874 | + return NULL; | |
6875 | + } | |
6876 | + wait_for_completion(&event); | |
6877 | + return thread; | |
6878 | +} | |
6879 | + | |
6880 | +EXPORT_SYMBOL(evms_cs_register_thread); | |
6881 | + | |
6882 | +void | |
6883 | +evms_cs_unregister_thread(struct evms_thread *thread) | |
6884 | +{ | |
6885 | + struct completion event; | |
6886 | + | |
6887 | + init_completion(&event); | |
6888 | + | |
6889 | + thread->event = &event; | |
6890 | + thread->run = NULL; | |
6891 | + thread->name = NULL; | |
6892 | + evms_cs_interrupt_thread(thread); | |
6893 | + wait_for_completion(&event); | |
6894 | + kfree(thread); | |
6895 | +} | |
6896 | + | |
6897 | +EXPORT_SYMBOL(evms_cs_unregister_thread); | |
6898 | + | |
6899 | +void | |
6900 | +evms_cs_wakeup_thread(struct evms_thread *thread) | |
6901 | +{ | |
6902 | + set_bit(EVMS_THREAD_WAKEUP, &thread->flags); | |
6903 | + wake_up(&thread->wqueue); | |
6904 | +} | |
6905 | + | |
6906 | +EXPORT_SYMBOL(evms_cs_wakeup_thread); | |
6907 | + | |
6908 | +void | |
6909 | +evms_cs_interrupt_thread(struct evms_thread *thread) | |
6910 | +{ | |
6911 | + if (!thread->tsk) { | |
6912 | + LOG_ERROR("error: attempted to interrupt an invalid thread!\n"); | |
6913 | + return; | |
6914 | + } | |
6915 | + send_sig(SIGKILL, thread->tsk, 1); | |
6916 | +} | |
6917 | + | |
6918 | +EXPORT_SYMBOL(evms_cs_interrupt_thread); | |
6919 | + | |
6920 | +struct proc_dir_entry * | |
6921 | +evms_cs_get_evms_proc_dir(void) | |
6922 | +{ | |
6923 | +#ifdef CONFIG_PROC_FS | |
6924 | + if (!evms_proc_dir) { | |
6925 | + evms_proc_dir = create_proc_entry("evms", S_IFDIR, &proc_root); | |
6926 | + } | |
6927 | +#endif | |
6928 | + return (evms_proc_dir); | |
6929 | +} | |
6930 | + | |
6931 | +EXPORT_SYMBOL(evms_cs_get_evms_proc_dir); | |
6932 | + | |
6933 | +int | |
6934 | +evms_cs_volume_request_in_progress(kdev_t dev, | |
6935 | + int operation, int *current_count) | |
6936 | +{ | |
6937 | + int rc = 0; | |
6938 | + struct evms_logical_volume *volume; | |
6939 | + | |
6940 | + volume = &evms_logical_volumes[MINOR(dev)]; | |
6941 | + if (volume->node) { | |
6942 | + if (operation > 0) { | |
6943 | + atomic_inc(&volume->requests_in_progress); | |
6944 | + } else if (operation < 0) { | |
6945 | + atomic_dec(&volume->requests_in_progress); | |
6946 | + } | |
6947 | + if (current_count) { | |
6948 | + *current_count = | |
6949 | + atomic_read(&volume->requests_in_progress); | |
6950 | + } | |
6951 | + } else { | |
6952 | + rc = -ENODEV; | |
6953 | + } | |
6954 | + return (rc); | |
6955 | +} | |
6956 | + | |
6957 | +EXPORT_SYMBOL(evms_cs_volume_request_in_progress); | |
6958 | + | |
6959 | +void | |
6960 | +evms_cs_invalidate_volume(struct evms_logical_node *node) | |
6961 | +{ | |
6962 | + int i; | |
6963 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
6964 | + if (evms_logical_volumes[i].node && node->name) { | |
6965 | + if (! | |
6966 | + (strcmp | |
6967 | + (evms_logical_volumes[i].node->name, | |
6968 | + node->name))) { | |
6969 | + LOG_DETAILS | |
6970 | + ("Invalidating EVMS device %s minor %d\n", | |
6971 | + node->name, i); | |
6972 | + invalidate_device(MKDEV(EVMS_MAJOR, i), 0); | |
6973 | + break; | |
6974 | + } | |
6975 | + } | |
6976 | + } | |
6977 | +} | |
6978 | + | |
6979 | +EXPORT_SYMBOL(evms_cs_invalidate_volume); | |
6980 | + | |
6981 | +static int | |
6982 | +is_open(int minor) | |
6983 | +{ | |
6984 | + return atomic_read(&evms_logical_volumes[minor].opens); | |
6985 | +} | |
6986 | + | |
6987 | +/**********************************************************/ | |
6988 | +/* END -- exported functions/Common Services */ | |
6989 | +/**********************************************************/ | |
6990 | + | |
6991 | +/**********************************************************/ | |
6992 | +/* START -- Proc FS Support functions */ | |
6993 | +/**********************************************************/ | |
6994 | + | |
6995 | +#ifdef CONFIG_PROC_FS | |
6996 | +static int | |
6997 | +evms_info_read_proc(char *page, | |
6998 | + char **start, off_t off, int count, int *eof, void *data) | |
6999 | +{ | |
7000 | + int sz = 0; | |
7001 | + char *info_level_text = NULL; | |
7002 | + | |
7003 | + PROCPRINT("Enterprise Volume Management System: Info\n"); | |
7004 | + switch (evms_info_level) { | |
7005 | + case EVMS_INFO_CRITICAL: | |
7006 | + info_level_text = "critical"; | |
7007 | + break; | |
7008 | + case EVMS_INFO_SERIOUS: | |
7009 | + info_level_text = "serious"; | |
7010 | + break; | |
7011 | + case EVMS_INFO_ERROR: | |
7012 | + info_level_text = "error"; | |
7013 | + break; | |
7014 | + case EVMS_INFO_WARNING: | |
7015 | + info_level_text = "warning"; | |
7016 | + break; | |
7017 | + case EVMS_INFO_DEFAULT: | |
7018 | + info_level_text = "default"; | |
7019 | + break; | |
7020 | + case EVMS_INFO_DETAILS: | |
7021 | + info_level_text = "details"; | |
7022 | + break; | |
7023 | + case EVMS_INFO_DEBUG: | |
7024 | + info_level_text = "debug"; | |
7025 | + break; | |
7026 | + case EVMS_INFO_EXTRA: | |
7027 | + info_level_text = "extra"; | |
7028 | + break; | |
7029 | + case EVMS_INFO_ENTRY_EXIT: | |
7030 | + info_level_text = "entry exit"; | |
7031 | + break; | |
7032 | + case EVMS_INFO_EVERYTHING: | |
7033 | + info_level_text = "everything"; | |
7034 | + break; | |
7035 | + default: | |
7036 | + info_level_text = "unknown"; | |
7037 | + break; | |
7038 | + } | |
7039 | + PROCPRINT("EVMS info level: %d (%s).\n", | |
7040 | + evms_info_level, info_level_text); | |
7041 | + | |
7042 | + PROCPRINT("EVMS kernel version: %d.%d.%d\n", | |
7043 | + EVMS_MAJOR_VERSION, | |
7044 | + EVMS_MINOR_VERSION, EVMS_PATCHLEVEL_VERSION); | |
7045 | + | |
7046 | + PROCPRINT("EVMS IOCTL interface version: %d.%d.%d\n", | |
7047 | + EVMS_IOCTL_INTERFACE_MAJOR, | |
7048 | + EVMS_IOCTL_INTERFACE_MINOR, EVMS_IOCTL_INTERFACE_PATCHLEVEL); | |
7049 | + | |
7050 | + PROCPRINT("EVMS Common Services version: %d.%d.%d\n", | |
7051 | + EVMS_COMMON_SERVICES_MAJOR, | |
7052 | + EVMS_COMMON_SERVICES_MINOR, EVMS_COMMON_SERVICES_PATCHLEVEL); | |
7053 | + | |
7054 | + *eof = 1; | |
7055 | + | |
7056 | +out: | |
7057 | + *start = page + off; | |
7058 | + sz -= off; | |
7059 | + if (sz < 0) | |
7060 | + sz = 0; | |
7061 | + return sz > count ? count : sz; | |
7062 | +} | |
7063 | + | |
7064 | +static int | |
7065 | +evms_plugins_read_proc(char *page, | |
7066 | + char **start, off_t off, int count, int *eof, void *data) | |
7067 | +{ | |
7068 | + int sz = 0; | |
7069 | + struct evms_registered_plugin *rp = NULL; | |
7070 | + | |
7071 | + PROCPRINT("Enterprise Volume Management System: Plugins\n"); | |
7072 | + /* 0 1 1 2 2 3 3 4 4 5 5 6 6 7 */ | |
7073 | + /* 1 5 0 5 0 5 0 5 0 5 0 5 0 5 0 */ | |
7074 | + PROCPRINT(" ---------Plugin---------- required services\n"); | |
7075 | + PROCPRINT(" ----id---- version version\n\n"); | |
7076 | + for (rp = registered_plugin_head; rp; rp = rp->next) { | |
7077 | + PROCPRINT(" %x.%x.%x\t %d.%d.%d\t%d.%d.%d\n", | |
7078 | + GetPluginOEM(rp->plugin->id), | |
7079 | + GetPluginType(rp->plugin->id), | |
7080 | + GetPluginID(rp->plugin->id), | |
7081 | + rp->plugin->version.major, | |
7082 | + rp->plugin->version.minor, | |
7083 | + rp->plugin->version.patchlevel, | |
7084 | + rp->plugin->required_services_version.major, | |
7085 | + rp->plugin->required_services_version.minor, | |
7086 | + rp->plugin->required_services_version.patchlevel); | |
7087 | + } | |
7088 | + | |
7089 | +out: | |
7090 | + *start = page + off; | |
7091 | + sz -= off; | |
7092 | + if (sz < 0) | |
7093 | + sz = 0; | |
7094 | + return sz > count ? count : sz; | |
7095 | +} | |
7096 | + | |
7097 | +static int | |
7098 | +evms_volumes_read_proc(char *page, | |
7099 | + char **start, off_t off, int count, int *eof, void *data) | |
7100 | +{ | |
7101 | + int sz = 0, j; | |
7102 | + | |
7103 | + PROCPRINT("Enterprise Volume Management System: Volumes\n"); | |
7104 | + PROCPRINT("major minor #blocks type flags name\n\n"); | |
7105 | + for (j = 1; j < MAX_EVMS_VOLUMES; j++) { | |
7106 | + struct evms_logical_volume *volume; | |
7107 | + | |
7108 | + volume = &evms_logical_volumes[j]; | |
7109 | + if (volume->node) { | |
7110 | + PROCPRINT("%5d %7d %16Ld %s %s %s %s%s\n", | |
7111 | + EVMS_MAJOR, j, | |
7112 | + (long long)volume->node->total_vsectors >> 1, | |
7113 | + (volume-> | |
7114 | + flags & EVMS_VOLUME_FLAG) ? "evms " : | |
7115 | + "compat", | |
7116 | + (volume-> | |
7117 | + flags & EVMS_VOLUME_READ_ONLY) ? "ro" : "rw", | |
7118 | + (volume-> | |
7119 | + flags & EVMS_VOLUME_PARTIAL) ? "p " : " ", | |
7120 | + EVMS_DEV_NODE_PATH, volume->name); | |
7121 | + } | |
7122 | + } | |
7123 | +out: | |
7124 | + *start = page + off; | |
7125 | + sz -= off; | |
7126 | + if (sz < 0) | |
7127 | + sz = 0; | |
7128 | + return sz > count ? count : sz; | |
7129 | + | |
7130 | +} | |
7131 | +#endif | |
7132 | + | |
7133 | +/**********************************************************/ | |
7134 | +/* END -- Proc FS Support functions */ | |
7135 | +/**********************************************************/ | |
7136 | + | |
7137 | +/**********************************************************/ | |
7138 | +/* START -- FOPS functions definitions */ | |
7139 | +/**********************************************************/ | |
7140 | + | |
7141 | +/************************************************/ | |
7142 | +/* START -- IOCTL commands -- EVMS specific */ | |
7143 | +/************************************************/ | |
7144 | + | |
7145 | +static int | |
7146 | +evms_ioctl_cmd_get_ioctl_version(void *arg) | |
7147 | +{ | |
7148 | + int rc = 0; | |
7149 | + struct evms_version ver; | |
7150 | + | |
7151 | + ver.major = EVMS_IOCTL_INTERFACE_MAJOR; | |
7152 | + ver.minor = EVMS_IOCTL_INTERFACE_MINOR; | |
7153 | + ver.patchlevel = EVMS_IOCTL_INTERFACE_PATCHLEVEL; | |
7154 | + | |
7155 | + /* copy info to userspace */ | |
7156 | + if (copy_to_user(arg, &ver, sizeof (ver))) | |
7157 | + rc = -EFAULT; | |
7158 | + | |
7159 | + return (rc); | |
7160 | +} | |
7161 | + | |
7162 | +static int | |
7163 | +evms_ioctl_cmd_get_version(void *arg) | |
7164 | +{ | |
7165 | + int rc = 0; | |
7166 | + struct evms_version ver; | |
7167 | + | |
7168 | + ver.major = EVMS_MAJOR_VERSION; | |
7169 | + ver.minor = EVMS_MINOR_VERSION; | |
7170 | + ver.patchlevel = EVMS_PATCHLEVEL_VERSION; | |
7171 | + | |
7172 | + /* copy info to userspace */ | |
7173 | + if (copy_to_user(arg, &ver, sizeof (ver))) | |
7174 | + rc = -EFAULT; | |
7175 | + | |
7176 | + return (rc); | |
7177 | +} | |
7178 | + | |
7179 | +static int | |
7180 | +evms_ioctl_cmd_get_info_level(void *arg) | |
7181 | +{ | |
7182 | + int rc = 0; | |
7183 | + | |
7184 | + /* copy info to userspace */ | |
7185 | + if (copy_to_user(arg, &evms_info_level, sizeof (evms_info_level))) | |
7186 | + rc = -EFAULT; | |
7187 | + | |
7188 | + return (rc); | |
7189 | +} | |
7190 | + | |
7191 | +static int | |
7192 | +evms_ioctl_cmd_set_info_level(void *arg) | |
7193 | +{ | |
7194 | + int temp, rc = 0; | |
7195 | + | |
7196 | + /* copy info from userspace */ | |
7197 | + if (copy_from_user(&temp, arg, sizeof (temp))) | |
7198 | + rc = -EFAULT; | |
7199 | + else | |
7200 | + evms_info_level = temp; | |
7201 | + | |
7202 | + return (rc); | |
7203 | +} | |
7204 | + | |
7205 | +/* function: evms_quiesce_volume | |
7206 | + * | |
7207 | + * this function performs the actual quiesce operation on | |
7208 | + * a volume in kernel memory. | |
7209 | + * | |
7210 | + * when quiescing, all new I/Os to a volume are stopped, | |
7211 | + * causing the calling thread to block. this thread then | |
7212 | + * waits until all I/Os in progress are completed, before | |
7213 | + * return control to the caller. | |
7214 | + * | |
7215 | + * when unquiescing, all new I/Os are allowed to proceed | |
7216 | + * unencumbered, and all threads waiting (blocked) on this | |
7217 | + * volume, are woken up and allowed to proceed. | |
7218 | + * | |
7219 | + */ | |
7220 | +static int | |
7221 | +evms_quiesce_volume(struct evms_logical_volume *volume, | |
7222 | + struct inode *inode, | |
7223 | + struct file *file, struct evms_quiesce_vol_pkt *qv) | |
7224 | +{ | |
7225 | + int rc; | |
7226 | + | |
7227 | + LOG_DEBUG("%squiescing %s.\n", | |
7228 | + ((qv->command) ? "" : "un"), volume->name); | |
7229 | + | |
7230 | +#ifdef VFS_PATCH_PRESENT | |
7231 | + if (qv->do_vfs) { | |
7232 | + /* VFS function call to sync and lock the filesystem */ | |
7233 | + fsync_dev_lockfs(MKDEV(EVMS_MAJOR, qv->minor)); | |
7234 | + volume->vfs_quiesced = TRUE; | |
7235 | + } | |
7236 | +#endif | |
7237 | + volume->quiesced = qv->command; | |
7238 | + | |
7239 | + /* Command specified was "quiesce". */ | |
7240 | + if (qv->command) { | |
7241 | + /* After setting the volume to | |
7242 | + * a quiesced state, there could | |
7243 | + * be threads (on SMP systems) | |
7244 | + * that are executing in the | |
7245 | + * function, evms_handle_request, | |
7246 | + * between the "wait_event" and the | |
7247 | + * "atomic_inc" lines. We need to | |
7248 | + * provide a "delay" sufficient | |
7249 | + * to allow those threads to | |
7250 | + * to reach the atomic_inc's | |
7251 | + * before executing the while loop | |
7252 | + * below. The "schedule" call should | |
7253 | + * provide this. | |
7254 | + */ | |
7255 | + schedule(); | |
7256 | + /* wait for outstanding requests | |
7257 | + * to complete | |
7258 | + */ | |
7259 | + while (atomic_read(&volume->requests_in_progress) > 0) | |
7260 | + schedule(); | |
7261 | + } | |
7262 | + /* send this command down the stack so lower */ | |
7263 | + /* layers can know about this */ | |
7264 | + rc = IOCTL(volume->node, inode, file, | |
7265 | + EVMS_QUIESCE_VOLUME, (unsigned long) qv); | |
7266 | + if (!rc) { | |
7267 | + /* Command specified was "unquiesce". */ | |
7268 | + if (!qv->command) { | |
7269 | + /* "wakeup" any I/O requests waiting on | |
7270 | + * this volume. | |
7271 | + */ | |
7272 | + if (waitqueue_active(&volume->wait_queue)) | |
7273 | + wake_up(&volume->wait_queue); | |
7274 | +#ifdef VFS_PATCH_PRESENT | |
7275 | + if (volume->vfs_quiesced) { | |
7276 | + /* VFS function call to unlock the filesystem */ | |
7277 | + unlockfs(MKDEV(EVMS_MAJOR, qv->minor)); | |
7278 | + volume->vfs_quiesced = FALSE; | |
7279 | + } | |
7280 | +#endif | |
7281 | + } | |
7282 | + } else { | |
7283 | + LOG_ERROR("error(%d) %squiescing %s.\n", | |
7284 | + rc, ((qv->command) ? "" : "un"), volume->name); | |
7285 | + } | |
7286 | + return (rc); | |
7287 | +} | |
7288 | + | |
7289 | +/* function: evms_delete_volume | |
7290 | + * | |
7291 | + * this function performs the actual delete operation on | |
7292 | + * a volume to purge it from kernel memory. all structures | |
7293 | + * and memory consumed by this volume will be free as well | |
7294 | + * as clearing or unregistering any system services or | |
7295 | + * global data arrays. | |
7296 | + * | |
7297 | + * NOTE: this function will return -EBUSY on attempts to | |
7298 | + * delete mounted volumes. | |
7299 | + * | |
7300 | + */ | |
7301 | +static int | |
7302 | +evms_delete_volume(struct evms_logical_volume *volume, | |
7303 | + struct evms_delete_vol_pkt *dv) | |
7304 | +{ | |
7305 | + int rc = 0; | |
7306 | + | |
7307 | + /* if this is a "permament" delete */ | |
7308 | + /* check to make sure volume is not mounted */ | |
7309 | + if (dv->command) { | |
7310 | + if (is_open(dv->minor)) { | |
7311 | + rc = -EBUSY; | |
7312 | + } else { | |
7313 | + // invalidate the device since it is not coming back | |
7314 | + // this is required incase we are re-using the minor number | |
7315 | + invalidate_device(MKDEV(EVMS_MAJOR, dv->minor), 1); | |
7316 | + } | |
7317 | + } | |
7318 | + | |
7319 | + /* invoke the delete ioctl at the top of the feature stack */ | |
7320 | + if (!rc) { | |
7321 | + LOG_DETAILS("deleting '%s'.\n", volume->name); | |
7322 | + rc = DELETE(volume->node); | |
7323 | + } | |
7324 | + | |
7325 | + /* the volume has been deleted, do any clean up work | |
7326 | + * required. | |
7327 | + */ | |
7328 | + if (!rc) { | |
7329 | + devfs_unregister(volume->devfs_handle); | |
7330 | + if (dv->command) { | |
7331 | + /* if "permanent" delete, free the name | |
7332 | + * and NULL the name field. | |
7333 | + */ | |
7334 | + kfree(volume->name); | |
7335 | + volume->name = NULL; | |
7336 | + volume->flags = 0; | |
7337 | + } else { | |
7338 | + /* if "soft" delete, leave the name so | |
7339 | + * we can use it to reassign the same | |
7340 | + * minor to this volume after a | |
7341 | + * rediscovery. | |
7342 | + */ | |
7343 | + volume->flags = EVMS_VOLUME_SOFT_DELETED; | |
7344 | + } | |
7345 | + volume->node = NULL; | |
7346 | + set_device_ro(MKDEV(EVMS_MAJOR, dv->minor), 0); | |
7347 | + blk_size[EVMS_MAJOR][dv->minor] = 0; | |
7348 | + blksize_size[EVMS_MAJOR][dv->minor] = 0; | |
7349 | + hardsect_size[EVMS_MAJOR][dv->minor] = 0; | |
7350 | + evms_volumes--; | |
7351 | + } else { | |
7352 | + LOG_ERROR("error(%d) %s deleting %s.\n", | |
7353 | + rc, ((dv->command) ? "hard" : "soft"), volume->name); | |
7354 | + } | |
7355 | + return (rc); | |
7356 | +} | |
7357 | + | |
7358 | +/* function: evms_user_delete_volume | |
7359 | + * | |
7360 | + * this function, depending on the parameters, performs | |
7361 | + * a "soft" or a "hard" delete. for a "soft" delete, a | |
7362 | + * quiesce & delete request is queued up, to be executed | |
7363 | + * at the beginning of the next rediscovery. for a | |
7364 | + * "hard" delete, the target volume is quiesced and then | |
7365 | + * deleted. if there is any errors attempting to delete | |
7366 | + * the target, then the target is unquiesced. if an | |
7367 | + * associative volume is specified it is quiesced before | |
7368 | + * the target volume is quiesced, and is unquiesced | |
7369 | + * after the attempt to delete the target volume. | |
7370 | + * | |
7371 | + */ | |
7372 | +static int | |
7373 | +evms_user_delete_volume(struct evms_logical_volume *lvt, | |
7374 | + struct inode *inode, | |
7375 | + struct file *file, struct evms_delete_vol_pkt *dv) | |
7376 | +{ | |
7377 | + int rc = 0; | |
7378 | + | |
7379 | + if (!dv->command) { | |
7380 | + /* "soft delete" requested */ | |
7381 | + lvt->flags |= (EVMS_REQUESTED_QUIESCE | EVMS_REQUESTED_DELETE); | |
7382 | + if (dv->do_vfs) { | |
7383 | + lvt->flags |= EVMS_REQUESTED_VFS_QUIESCE; | |
7384 | + } | |
7385 | + } else { | |
7386 | + /* "hard delete" requested */ | |
7387 | + int qa = FALSE; | |
7388 | + struct evms_quiesce_vol_pkt qv; | |
7389 | + struct evms_logical_volume *lva = NULL; | |
7390 | + | |
7391 | + if (dv->associative_minor) { | |
7392 | + /* associative volume specified | |
7393 | + * | |
7394 | + * quiesce it | |
7395 | + */ | |
7396 | + lva = &evms_logical_volumes[dv->associative_minor]; | |
7397 | + /* quiesce associative volume */ | |
7398 | + qv.command = EVMS_QUIESCE; | |
7399 | + qv.do_vfs = EVMS_VFS_DO_NOTHING; | |
7400 | + qv.minor = dv->associative_minor; | |
7401 | + rc = evms_quiesce_volume(lva, inode, file, &qv); | |
7402 | + qa = (rc) ? FALSE : TRUE; | |
7403 | + } | |
7404 | + if (!rc) { | |
7405 | + /* quiesce target volume */ | |
7406 | + qv.command = EVMS_QUIESCE; | |
7407 | + qv.do_vfs = EVMS_VFS_DO_NOTHING; | |
7408 | + qv.minor = dv->minor; | |
7409 | + rc = evms_quiesce_volume(lvt, inode, file, &qv); | |
7410 | + } | |
7411 | + if (!rc) { | |
7412 | + /* delete the target volume */ | |
7413 | + rc = evms_delete_volume(lvt, dv); | |
7414 | + if (rc) { | |
7415 | + /* got an error undeleting... | |
7416 | + * | |
7417 | + * unquiesce the target | |
7418 | + */ | |
7419 | + qv.command = EVMS_UNQUIESCE; | |
7420 | + qv.do_vfs = EVMS_VFS_DO_NOTHING; | |
7421 | + qv.minor = dv->minor; | |
7422 | + evms_quiesce_volume(lvt, inode, file, &qv); | |
7423 | + } | |
7424 | + } | |
7425 | + if (dv->associative_minor) { | |
7426 | + /* associative volume specified | |
7427 | + * | |
7428 | + * unquiesce it | |
7429 | + */ | |
7430 | + if (qa) { | |
7431 | + /* only unquiesce associative | |
7432 | + * if we successfully quiesced | |
7433 | + * it previously. | |
7434 | + */ | |
7435 | + qv.command = EVMS_UNQUIESCE; | |
7436 | + qv.do_vfs = EVMS_VFS_DO_NOTHING; | |
7437 | + qv.minor = dv->associative_minor; | |
7438 | + evms_quiesce_volume(lva, inode, file, &qv); | |
7439 | + } | |
7440 | + } | |
7441 | + } | |
7442 | + return (rc); | |
7443 | +} | |
7444 | + | |
7445 | +/* function: evms_ioctl_cmd_delete_volume | |
7446 | + * | |
7447 | + * this function copy user data to/from the kernel, and | |
7448 | + * validates user parameters. after validation, control | |
7449 | + * is passed to worker routine evms_user_delete_volume. | |
7450 | + * | |
7451 | + */ | |
7452 | +static int | |
7453 | +evms_ioctl_cmd_delete_volume(struct inode *inode, | |
7454 | + struct file *file, unsigned long arg) | |
7455 | +{ | |
7456 | + int rc = 0; | |
7457 | + struct evms_delete_vol_pkt tmp, *user_parms; | |
7458 | + struct evms_logical_volume *volume = NULL; | |
7459 | + | |
7460 | + user_parms = (struct evms_delete_vol_pkt *) arg; | |
7461 | + /* copy user's parameters to kernel space */ | |
7462 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
7463 | + rc = -EFAULT; | |
7464 | + | |
7465 | + /* check to make sure associative minor is in use */ | |
7466 | + if (!rc) { | |
7467 | + if (tmp.associative_minor) { | |
7468 | + volume = &evms_logical_volumes[tmp.associative_minor]; | |
7469 | + if (volume->node == NULL) | |
7470 | + rc = -ENXIO; | |
7471 | + } | |
7472 | + } | |
7473 | + /* check to make sure target minor is in use */ | |
7474 | + if (!rc) { | |
7475 | + volume = &evms_logical_volumes[tmp.minor]; | |
7476 | + if (volume->node == NULL) | |
7477 | + rc = -ENXIO; | |
7478 | + else | |
7479 | + rc = evms_user_delete_volume(volume, inode, file, &tmp); | |
7480 | + } | |
7481 | + /* copy the status value back to the user */ | |
7482 | + tmp.status = rc; | |
7483 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
7484 | + rc = -EFAULT; | |
7485 | + | |
7486 | + return (rc); | |
7487 | +} | |
7488 | + | |
7489 | +/* function: evms_full_rediscover_prep | |
7490 | + * | |
7491 | + * this function helps to prevent problems when evms is | |
7492 | + * configured with the base built in statically and some | |
7493 | + * plugins built as modules. | |
7494 | + * | |
7495 | + * in these cases, when the initial discovery is done, | |
7496 | + * only the statically built modules are available for | |
7497 | + * volume construction. as a result, some volumes that | |
7498 | + * require the plugins built as modules (which haven't | |
7499 | + * been loaded), to be fully reconstructed, may come up | |
7500 | + * as compatibility volumes or partial volumes. | |
7501 | + * | |
7502 | + * when parts of evms are built as modules, the | |
7503 | + * evms_rediscover_pkty utility is used, to perform a secondary | |
7504 | + * rediscover, after all the plugins built as modules | |
7505 | + * have been loaded, to construct all the volumes | |
7506 | + * requiring these plugins. | |
7507 | + * | |
7508 | + * however since some of the volumes, requiring the plugins | |
7509 | + * built as modules, may have been already exported as | |
7510 | + * compatibility or partial volumes, we need to purge these | |
7511 | + * volumes from kernel's memory, so that can be rediscovered | |
7512 | + * and claimed by the appropriate plugins, and reconstructed | |
7513 | + * into the correct volumes. | |
7514 | + * | |
7515 | + * this function purges all compatibility volumes that are | |
7516 | + * not in use(mounted) and all partial volumes, prior to | |
7517 | + * doing the secondary rediscover, thus allowing volumes to | |
7518 | + * rediscovered correctly. | |
7519 | + * | |
7520 | + * NOTE: again, this is only required in cases when a | |
7521 | + * combination of plugins are built statically and as | |
7522 | + * modules. | |
7523 | + * | |
7524 | + */ | |
7525 | +static void | |
7526 | +evms_full_rediscover_prep(struct inode *inode, struct file *file) | |
7527 | +{ | |
7528 | + int rc = 0, i; | |
7529 | + | |
7530 | + LOG_DETAILS("%s: started.\n", __FUNCTION__); | |
7531 | + /* check for acceptable volumes to be deleted */ | |
7532 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
7533 | + struct evms_logical_volume *volume = NULL; | |
7534 | + struct evms_delete_vol_pkt dv; | |
7535 | + int volume_open, doit; | |
7536 | + | |
7537 | + volume = &evms_logical_volumes[i]; | |
7538 | + if (!volume->node) | |
7539 | + continue; | |
7540 | + volume_open = is_open(i); | |
7541 | + /* only proceed on volumes that are: | |
7542 | + * partial volumes | |
7543 | + * OR | |
7544 | + * unopened compatibility volumes | |
7545 | + */ | |
7546 | + doit = FALSE; | |
7547 | + if (volume->flags & EVMS_VOLUME_PARTIAL) { | |
7548 | + /* do all partial volumes | |
7549 | + */ | |
7550 | + doit = TRUE; | |
7551 | + } else if (!(volume->flags & EVMS_VOLUME_FLAG)) { | |
7552 | + /* check all compatibility volumes | |
7553 | + */ | |
7554 | + if (!volume_open && !is_swap_partition(MKDEV(EVMS_MAJOR, i))) { | |
7555 | + /* only do unopened volumes | |
7556 | + */ | |
7557 | + doit = TRUE; | |
7558 | + } | |
7559 | + } | |
7560 | + if (doit == FALSE) { | |
7561 | + continue; | |
7562 | + } | |
7563 | + /* delete the volume from memory. | |
7564 | + * do a 'soft' delete if volume | |
7565 | + * is mounted, and 'hard' delete | |
7566 | + * if it is not. | |
7567 | + * | |
7568 | + * NOTE: the delete operation will | |
7569 | + * clear the bits in the flags field. | |
7570 | + */ | |
7571 | + dv.command = (volume_open) ? | |
7572 | + EVMS_SOFT_DELETE : EVMS_HARD_DELETE; | |
7573 | + dv.minor = i; | |
7574 | + dv.associative_minor = 0; | |
7575 | + dv.status = 0; | |
7576 | + rc = evms_user_delete_volume(volume, inode, file, &dv); | |
7577 | + } | |
7578 | + LOG_DETAILS("%s: completed.\n", __FUNCTION__); | |
7579 | +} | |
7580 | + | |
7581 | +static int | |
7582 | +evms_ioctl_cmd_rediscover_volumes(struct inode *inode, | |
7583 | + struct file *file, | |
7584 | + unsigned int cmd, unsigned long arg) | |
7585 | +{ | |
7586 | + int rc, i; | |
7587 | + struct evms_rediscover_pkt tmp, *user_parms; | |
7588 | + u64 *array_ptr = NULL; | |
7589 | + ulong array_size = 0; | |
7590 | + struct evms_logical_volume *volume = NULL; | |
7591 | + | |
7592 | + rc = tmp.drive_count = 0; | |
7593 | + user_parms = (struct evms_rediscover_pkt *) arg; | |
7594 | + /* copy user's parameters to kernel space */ | |
7595 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
7596 | + rc = -EFAULT; | |
7597 | + | |
7598 | + if (tmp.drive_count == REDISCOVER_ALL_DEVICES) { | |
7599 | + evms_full_rediscover_prep(inode, file); | |
7600 | + } | |
7601 | + /* quiesce all queued volumes */ | |
7602 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
7603 | + struct evms_quiesce_vol_pkt qv; | |
7604 | + | |
7605 | + volume = &evms_logical_volumes[i]; | |
7606 | + if (!volume->node) { | |
7607 | + continue; | |
7608 | + } | |
7609 | + if (!(volume->flags & EVMS_REQUESTED_QUIESCE)) { | |
7610 | + continue; | |
7611 | + } | |
7612 | + qv.command = EVMS_QUIESCE; | |
7613 | + qv.minor = i; | |
7614 | + qv.do_vfs = (volume->flags & EVMS_REQUESTED_VFS_QUIESCE) ? | |
7615 | + EVMS_VFS_DO : EVMS_VFS_DO_NOTHING, qv.status = 0; | |
7616 | + rc = evms_quiesce_volume(volume, inode, file, &qv); | |
7617 | + } | |
7618 | + /* "soft" delete all queued volumes */ | |
7619 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
7620 | + struct evms_delete_vol_pkt dv; | |
7621 | + | |
7622 | + volume = &evms_logical_volumes[i]; | |
7623 | + if (!volume->node) { | |
7624 | + continue; | |
7625 | + } | |
7626 | + if (!(volume->flags & EVMS_REQUESTED_DELETE)) { | |
7627 | + continue; | |
7628 | + } | |
7629 | + dv.command = EVMS_SOFT_DELETE; | |
7630 | + dv.minor = i; | |
7631 | + dv.associative_minor = 0; | |
7632 | + dv.status = 0; | |
7633 | + rc = evms_delete_volume(volume, &dv); | |
7634 | + } | |
7635 | + | |
7636 | + if (tmp.drive_count && (tmp.drive_count != REDISCOVER_ALL_DEVICES)) { | |
7637 | + if (!rc) { | |
7638 | + /* create space for userspace drive array */ | |
7639 | + array_size = | |
7640 | + sizeof (*tmp.drive_array) * tmp.drive_count; | |
7641 | + array_ptr = tmp.drive_array; | |
7642 | + tmp.drive_array = kmalloc(array_size, GFP_KERNEL); | |
7643 | + if (!tmp.drive_array) { | |
7644 | + rc = -ENOMEM; | |
7645 | + } | |
7646 | + } | |
7647 | + if (!rc) | |
7648 | + /* copy rediscover drive array to kernel space */ | |
7649 | + if (copy_from_user | |
7650 | + (tmp.drive_array, array_ptr, array_size)) | |
7651 | + rc = -EFAULT; | |
7652 | + } | |
7653 | + | |
7654 | + if (!rc) { | |
7655 | + static int evms_discover_volumes(struct evms_rediscover_pkt *); | |
7656 | + /* perform the rediscovery operation */ | |
7657 | + rc = evms_discover_volumes(&tmp); | |
7658 | + } | |
7659 | + | |
7660 | + /* clean up after operation */ | |
7661 | + if (tmp.drive_count && (tmp.drive_count != REDISCOVER_ALL_DEVICES)) | |
7662 | + kfree(tmp.drive_array); | |
7663 | + | |
7664 | + /* set return code and copy info to userspace */ | |
7665 | + tmp.status = rc; | |
7666 | + if (copy_to_user(&user_parms->status, &tmp.status, sizeof (tmp.status))) | |
7667 | + rc = -EFAULT; | |
7668 | + | |
7669 | + return (rc); | |
7670 | +} | |
7671 | + | |
7672 | +static struct evms_list_node *user_disk_ptr; | |
7673 | +static int | |
7674 | +evms_ioctl_cmd_get_logical_disk(void *arg) | |
7675 | +{ | |
7676 | + int rc = 0; | |
7677 | + struct evms_user_disk_pkt tmp, *user_parms; | |
7678 | + | |
7679 | + user_parms = (struct evms_user_disk_pkt *) arg; | |
7680 | + /* copy user's parameters to kernel space */ | |
7681 | + if (copy_from_user | |
7682 | + (&tmp.command, &user_parms->command, sizeof (tmp.command))) | |
7683 | + rc = -EFAULT; | |
7684 | + | |
7685 | + if (!rc) { | |
7686 | + if (tmp.command == EVMS_FIRST_DISK) | |
7687 | + user_disk_ptr = evms_global_device_list; | |
7688 | + else /* tmp.command == EVMS_NEXT_DISK */ | |
7689 | + user_disk_ptr = user_disk_ptr->next; | |
7690 | + | |
7691 | + if (user_disk_ptr == NULL) | |
7692 | + tmp.status = EVMS_DISK_INVALID; | |
7693 | + else { | |
7694 | + tmp.status = EVMS_DISK_VALID; | |
7695 | + tmp.disk_handle = | |
7696 | + NODE_TO_DEV_HANDLE(user_disk_ptr->item); | |
7697 | + } | |
7698 | + /* copy info to userspace */ | |
7699 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
7700 | + rc = -EFAULT; | |
7701 | + } | |
7702 | + return (rc); | |
7703 | +} | |
7704 | + | |
7705 | +static int | |
7706 | +evms_ioctl_cmd_get_logical_disk_info(void *arg) | |
7707 | +{ | |
7708 | + int rc = 0; | |
7709 | + struct evms_user_disk_info_pkt tmp, *user_parms; | |
7710 | + struct evms_list_node *p; | |
7711 | + struct evms_logical_node *disk_node = NULL; | |
7712 | + | |
7713 | + user_parms = (struct evms_user_disk_info_pkt *) arg; | |
7714 | + /* copy user's parameters to kernel space */ | |
7715 | + if (copy_from_user | |
7716 | + (&tmp.disk_handle, &user_parms->disk_handle, | |
7717 | + sizeof (tmp.disk_handle))) | |
7718 | + rc = -EFAULT; | |
7719 | + | |
7720 | + /* check handle for validity */ | |
7721 | + if (!rc) { | |
7722 | + rc = -EINVAL; | |
7723 | + disk_node = DEV_HANDLE_TO_NODE(tmp.disk_handle); | |
7724 | + for (p = evms_global_device_list; p; p = p->next) | |
7725 | + if (p->item == disk_node) { | |
7726 | + rc = 0; | |
7727 | + user_disk_ptr = p; | |
7728 | + break; | |
7729 | + } | |
7730 | + } | |
7731 | + | |
7732 | + /* populate kernel copy of user's structure with appropriate info */ | |
7733 | + if (!rc) { | |
7734 | + struct hd_geometry geo; | |
7735 | + struct evms_logical_node *node = | |
7736 | + (struct evms_logical_node *) user_disk_ptr->item; | |
7737 | + tmp.flags = node->flags; | |
7738 | + strcpy(tmp.disk_name, EVMS_DEV_NODE_PATH); | |
7739 | + strcat(tmp.disk_name, node->name); | |
7740 | + rc = evms_cs_kernel_ioctl(node, EVMS_UPDATE_DEVICE_INFO, | |
7741 | + (ulong) NULL); | |
7742 | + if (!rc) { | |
7743 | + tmp.total_sectors = node->total_vsectors; | |
7744 | + tmp.hardsect_size = node->hardsector_size; | |
7745 | + tmp.block_size = node->block_size; | |
7746 | + rc = evms_cs_kernel_ioctl(node, HDIO_GETGEO, | |
7747 | + (unsigned long) &geo); | |
7748 | + } | |
7749 | + if (!rc) { | |
7750 | + tmp.geo_sectors = geo.sectors; | |
7751 | + tmp.geo_heads = geo.heads; | |
7752 | + tmp.geo_cylinders = geo.cylinders; | |
7753 | + } | |
7754 | + } | |
7755 | + | |
7756 | + /* set return code and copy info to userspace */ | |
7757 | + tmp.status = rc; | |
7758 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
7759 | + rc = -EFAULT; | |
7760 | + | |
7761 | + return (rc); | |
7762 | +} | |
7763 | + | |
7764 | +static int | |
7765 | +evms_ioctl_cmd_sector_io(void *arg) | |
7766 | +{ | |
7767 | + int rc; | |
7768 | +#define MAX_IO_SIZE 128 | |
7769 | + u64 io_size, max_io_size = MAX_IO_SIZE; | |
7770 | +#undef MAX_IO_SIZE | |
7771 | + struct evms_sector_io_pkt tmp, *user_parms; | |
7772 | + struct evms_logical_node *disk_node = NULL; | |
7773 | + struct evms_list_node *list_node; | |
7774 | + unsigned char *io_buffer; | |
7775 | + | |
7776 | + rc = 0; | |
7777 | + list_node = NULL; | |
7778 | + io_buffer = NULL; | |
7779 | + | |
7780 | + user_parms = (struct evms_sector_io_pkt *) arg; | |
7781 | + /* copy user's parameters to kernel space */ | |
7782 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
7783 | + rc = -EFAULT; | |
7784 | + | |
7785 | + /* check handle for validity */ | |
7786 | + if (!rc) { | |
7787 | + rc = -EINVAL; | |
7788 | + disk_node = DEV_HANDLE_TO_NODE(tmp.disk_handle); | |
7789 | + for (list_node = evms_global_device_list; list_node; | |
7790 | + list_node = list_node->next) | |
7791 | + if (list_node->item == disk_node) { | |
7792 | + rc = 0; | |
7793 | + break; | |
7794 | + } | |
7795 | + } | |
7796 | + if (!rc) { | |
7797 | + int done; | |
7798 | + /* allocate a io buffer upto 64Kbytes in size */ | |
7799 | + if (tmp.sector_count < max_io_size) | |
7800 | + max_io_size = tmp.sector_count; | |
7801 | + do { | |
7802 | + done = TRUE; | |
7803 | + /* allocate buffer large enough to max_io_size sectors */ | |
7804 | + io_buffer = | |
7805 | + kmalloc(max_io_size << EVMS_VSECTOR_SIZE_SHIFT, | |
7806 | + GFP_KERNEL); | |
7807 | + if (!io_buffer) { | |
7808 | + max_io_size >>= 1; | |
7809 | + if (!max_io_size) { | |
7810 | + rc = -ENOMEM; | |
7811 | + } else { | |
7812 | + done = FALSE; | |
7813 | + } | |
7814 | + } | |
7815 | + } while (!done); | |
7816 | + } | |
7817 | + /* perform io with specified disk */ | |
7818 | + if (!rc) { | |
7819 | + u64 io_sector_offset, io_remaining; | |
7820 | + u64 io_bytes; | |
7821 | + u_char *user_buffer_ptr; | |
7822 | + | |
7823 | + io_remaining = tmp.sector_count; | |
7824 | + io_sector_offset = 0; | |
7825 | + user_buffer_ptr = tmp.buffer_address; | |
7826 | + while (io_remaining) { | |
7827 | + /* compute the io_size for this pass */ | |
7828 | + io_size = (io_remaining >= max_io_size) ? | |
7829 | + max_io_size : io_remaining; | |
7830 | + | |
7831 | + io_bytes = io_size << EVMS_VSECTOR_SIZE_SHIFT; | |
7832 | + /* for writes, copy a sector from user to kernel */ | |
7833 | + if (tmp.io_flag == EVMS_SECTOR_IO_WRITE) { | |
7834 | + /* copy sector from user data buffer */ | |
7835 | + if (copy_from_user(io_buffer, | |
7836 | + user_buffer_ptr, io_bytes)) | |
7837 | + rc = -EFAULT; | |
7838 | + } | |
7839 | + if (rc) | |
7840 | + break; | |
7841 | + | |
7842 | + /* perform IO one sector at a time */ | |
7843 | + rc = INIT_IO(disk_node, | |
7844 | + tmp.io_flag, | |
7845 | + io_sector_offset + tmp.starting_sector, | |
7846 | + io_size, io_buffer); | |
7847 | + | |
7848 | + if (rc) | |
7849 | + break; | |
7850 | + | |
7851 | + if (tmp.io_flag != EVMS_SECTOR_IO_WRITE) { | |
7852 | + /* copy sector to user data buffer */ | |
7853 | + if (copy_to_user(user_buffer_ptr, | |
7854 | + io_buffer, io_bytes)) | |
7855 | + rc = -EFAULT; | |
7856 | + } | |
7857 | + if (rc) | |
7858 | + break; | |
7859 | + | |
7860 | + user_buffer_ptr += io_bytes; | |
7861 | + tmp.buffer_address += io_bytes; | |
7862 | + io_sector_offset += io_size; | |
7863 | + io_remaining -= io_size; | |
7864 | + } | |
7865 | + } | |
7866 | + | |
7867 | + /* if the sector_buffer was allocated, free it */ | |
7868 | + if (io_buffer) | |
7869 | + kfree(io_buffer); | |
7870 | + | |
7871 | + /* copy the status value back to the user */ | |
7872 | + tmp.status = rc; | |
7873 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
7874 | + rc = -EFAULT; | |
7875 | + | |
7876 | + return (rc); | |
7877 | +} | |
7878 | + | |
7879 | +static int user_minor; | |
7880 | +static int | |
7881 | +evms_ioctl_cmd_get_minor(void *arg) | |
7882 | +{ | |
7883 | + int rc = 0; | |
7884 | + struct evms_user_minor_pkt tmp, *user_parms; | |
7885 | + | |
7886 | + user_parms = (struct evms_user_minor_pkt *) arg; | |
7887 | + /* copy user's parameters to kernel space */ | |
7888 | + if (copy_from_user | |
7889 | + (&tmp.command, &user_parms->command, sizeof (tmp.command))) | |
7890 | + rc = -EFAULT; | |
7891 | + | |
7892 | + if (!rc) { | |
7893 | + if (tmp.command == EVMS_FIRST_VOLUME) | |
7894 | + user_minor = 1; | |
7895 | + else /* tmp.command == EVMS_NEXT_VOLUME */ | |
7896 | + user_minor++; | |
7897 | + | |
7898 | + tmp.status = EVMS_VOLUME_INVALID; | |
7899 | + for (; user_minor < MAX_EVMS_VOLUMES; user_minor++) { | |
7900 | + struct evms_logical_volume *lv; | |
7901 | + | |
7902 | + lv = &evms_logical_volumes[user_minor]; | |
7903 | + /* see if any corrupt volumes have been | |
7904 | + * unmounted. If so, clean up the | |
7905 | + * evms_logical_volumes array entry, and | |
7906 | + * don't report the volume to the user. | |
7907 | + */ | |
7908 | + if (lv->flags & EVMS_VOLUME_CORRUPT) { | |
7909 | + if (!is_open(user_minor)) { | |
7910 | + /* clear logical volume structure | |
7911 | + * for this volume so it may be | |
7912 | + * reused. | |
7913 | + */ | |
7914 | + LOG_WARNING | |
7915 | + ("ioctl_get_minor: found unmounted %s volume(%u,%u,%s).\n", | |
7916 | + ((lv-> | |
7917 | + flags & EVMS_VOLUME_SOFT_DELETED) | |
7918 | + ? "'soft deleted'" : ""), | |
7919 | + EVMS_MAJOR, user_minor, lv->name); | |
7920 | + LOG_WARNING | |
7921 | + (" releasing minor(%d) used by volume(%s)!\n", | |
7922 | + user_minor, lv->name); | |
7923 | + kfree(lv->name); | |
7924 | + lv->name = NULL; | |
7925 | + lv->flags = 0; | |
7926 | + } | |
7927 | + } | |
7928 | + if (lv->node || (lv->flags & EVMS_VOLUME_CORRUPT)) { | |
7929 | + tmp.status = EVMS_VOLUME_VALID; | |
7930 | + tmp.minor = user_minor; | |
7931 | + break; | |
7932 | + } | |
7933 | + } | |
7934 | + | |
7935 | + /* copy info to userspace */ | |
7936 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
7937 | + rc = -EFAULT; | |
7938 | + } | |
7939 | + return (rc); | |
7940 | +} | |
7941 | + | |
7942 | +static int | |
7943 | +evms_ioctl_cmd_get_volume_data(void *arg) | |
7944 | +{ | |
7945 | + int rc = 0; | |
7946 | + struct evms_volume_data_pkt tmp, *user_parms; | |
7947 | + struct evms_logical_volume *volume = NULL; | |
7948 | + struct evms_logical_node *node = NULL; | |
7949 | + | |
7950 | + user_parms = (struct evms_volume_data_pkt *) arg; | |
7951 | + /* copy user's parameters to kernel space */ | |
7952 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
7953 | + rc = -EFAULT; | |
7954 | + | |
7955 | + if (!rc) { | |
7956 | + volume = &evms_logical_volumes[tmp.minor]; | |
7957 | + node = volume->node; | |
7958 | + if (node == NULL) | |
7959 | + rc = -ENODEV; | |
7960 | + } | |
7961 | + if (!rc) { | |
7962 | + tmp.flags = volume->flags; | |
7963 | + strcpy(tmp.volume_name, EVMS_DEV_NODE_PATH); | |
7964 | + strcat(tmp.volume_name, volume->name); | |
7965 | + } | |
7966 | + | |
7967 | + /* copy return code and info to userspace */ | |
7968 | + tmp.status = rc; | |
7969 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
7970 | + rc = -EFAULT; | |
7971 | + return (rc); | |
7972 | +} | |
7973 | + | |
7974 | +static struct evms_registered_plugin *ioctl_reg_record; | |
7975 | +static int | |
7976 | +evms_ioctl_cmd_get_plugin(void *arg) | |
7977 | +{ | |
7978 | + int rc = 0; | |
7979 | + struct evms_kernel_plugin_pkt tmp, *user_parms; | |
7980 | + | |
7981 | + user_parms = (struct evms_kernel_plugin_pkt *) arg; | |
7982 | + /* copy user's parameters to kernel space */ | |
7983 | + if (copy_from_user | |
7984 | + (&tmp.command, &user_parms->command, sizeof (tmp.command))) | |
7985 | + rc = -EFAULT; | |
7986 | + | |
7987 | + if (!rc) { | |
7988 | + /* if the command is not 0, then verify | |
7989 | + * that ioctl_reg_record is pointing to | |
7990 | + * current and valid plugin header. | |
7991 | + */ | |
7992 | + if (tmp.command) { /* tmp.command == EVMS_NEXT_PLUGIN */ | |
7993 | + struct evms_registered_plugin *tmp_reg_record; | |
7994 | + tmp_reg_record = registered_plugin_head; | |
7995 | + /* search the current plugin list */ | |
7996 | + while (tmp_reg_record) { | |
7997 | + if (tmp_reg_record == ioctl_reg_record) | |
7998 | + break; | |
7999 | + tmp_reg_record = tmp_reg_record->next; | |
8000 | + } | |
8001 | + /* if the ioctl_reg_record is not in the | |
8002 | + * current list, then start at the beginning. | |
8003 | + */ | |
8004 | + if (!tmp_reg_record) | |
8005 | + tmp.command = EVMS_FIRST_PLUGIN; | |
8006 | + } | |
8007 | + | |
8008 | + if (tmp.command == EVMS_FIRST_PLUGIN) | |
8009 | + /* start at beginning of plugin list */ | |
8010 | + ioctl_reg_record = registered_plugin_head; | |
8011 | + else /* tmp.command == EVMS_NEXT_PLUGIN */ | |
8012 | + /* continue from current position in list */ | |
8013 | + ioctl_reg_record = ioctl_reg_record->next; | |
8014 | + | |
8015 | + tmp.status = EVMS_PLUGIN_INVALID; | |
8016 | + tmp.id = 0; | |
8017 | + if (ioctl_reg_record) { | |
8018 | + tmp.id = ioctl_reg_record->plugin->id; | |
8019 | + tmp.version = ioctl_reg_record->plugin->version; | |
8020 | + tmp.status = EVMS_PLUGIN_VALID; | |
8021 | + } | |
8022 | + | |
8023 | + /* copy info to userspace */ | |
8024 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8025 | + rc = -EFAULT; | |
8026 | + } | |
8027 | + return (rc); | |
8028 | +} | |
8029 | + | |
8030 | +static int | |
8031 | +evms_ioctl_cmd_plugin_ioctl(struct inode *inode, | |
8032 | + struct file *file, | |
8033 | + unsigned int cmd, unsigned long arg) | |
8034 | +{ | |
8035 | + int rc = 0, found = FALSE; | |
8036 | + struct evms_plugin_ioctl_pkt tmp, *user_parms; | |
8037 | + struct evms_registered_plugin *p; | |
8038 | + | |
8039 | + user_parms = (struct evms_plugin_ioctl_pkt *) arg; | |
8040 | + /* copy user's parameters to kernel space */ | |
8041 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
8042 | + rc = -EFAULT; | |
8043 | + | |
8044 | + if (!rc) { | |
8045 | + /* search for the specified plugin */ | |
8046 | + for (p = registered_plugin_head; p; p = p->next) | |
8047 | + /* check for the specified feature id */ | |
8048 | + if (p->plugin->id == tmp.feature_id) { | |
8049 | + found = TRUE; | |
8050 | + /* check that entry point is used */ | |
8051 | + if (p->plugin->fops->direct_ioctl) | |
8052 | + rc = DIRECT_IOCTL(p, inode, file, cmd, | |
8053 | + arg); | |
8054 | + else | |
8055 | + rc = -ENOSYS; | |
8056 | + break; | |
8057 | + } | |
8058 | + /* was the specified plugin found? */ | |
8059 | + if (found == FALSE) | |
8060 | + rc = -ENOPKG; | |
8061 | + | |
8062 | + /* copy the status value back to the user */ | |
8063 | + tmp.status = rc; | |
8064 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8065 | + rc = -EFAULT; | |
8066 | + } | |
8067 | + return (rc); | |
8068 | +} | |
8069 | + | |
8070 | +#define MAX_BUFFER_SIZE 65536 | |
8071 | +static int | |
8072 | +evms_ioctl_cmd_kernel_partial_csum(void *arg) | |
8073 | +{ | |
8074 | + int rc = 0; | |
8075 | + u64 compute_size = MAX_BUFFER_SIZE; | |
8076 | + struct evms_compute_csum_pkt tmp, *user_parms; | |
8077 | + unsigned char *buffer = NULL; | |
8078 | + | |
8079 | + user_parms = (struct evms_compute_csum_pkt *) arg; | |
8080 | + /* copy user's parameters to kernel space */ | |
8081 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
8082 | + rc = -EFAULT; | |
8083 | + | |
8084 | + if (!rc) { | |
8085 | + /* allocate a io buffer upto 64Kbytes in size */ | |
8086 | + if (tmp.buffer_size < MAX_BUFFER_SIZE) | |
8087 | + compute_size = tmp.buffer_size; | |
8088 | + | |
8089 | + /* allocate buffer large enough to hold a single sector */ | |
8090 | + buffer = kmalloc(compute_size, GFP_KERNEL); | |
8091 | + if (!buffer) { | |
8092 | + rc = -ENOMEM; | |
8093 | + } | |
8094 | + } | |
8095 | + /* perform io with specified disk */ | |
8096 | + if (!rc) { | |
8097 | + u64 remaining_bytes; | |
8098 | + u_char *user_buffer_ptr; | |
8099 | + unsigned int insum = tmp.insum; | |
8100 | + | |
8101 | + remaining_bytes = tmp.buffer_size; | |
8102 | + user_buffer_ptr = tmp.buffer_address; | |
8103 | + while (remaining_bytes) { | |
8104 | + /* compute the compute_size for this pass */ | |
8105 | + compute_size = (remaining_bytes >= MAX_BUFFER_SIZE) ? | |
8106 | + MAX_BUFFER_SIZE : remaining_bytes; | |
8107 | + | |
8108 | + /* copy into kernel from user data buffer */ | |
8109 | + if (copy_from_user(buffer, user_buffer_ptr, | |
8110 | + compute_size)) | |
8111 | + rc = -EFAULT; | |
8112 | + if (rc) | |
8113 | + break; | |
8114 | + /* compute the checksum for this pass */ | |
8115 | + tmp.outsum = csum_partial(buffer, tmp.buffer_size, | |
8116 | + insum); | |
8117 | + /* set up for another possible pass */ | |
8118 | + insum = tmp.outsum; | |
8119 | + /* update loop progress variables */ | |
8120 | + user_buffer_ptr += compute_size; | |
8121 | + tmp.buffer_address += compute_size; | |
8122 | + remaining_bytes -= compute_size; | |
8123 | + } | |
8124 | + } | |
8125 | + | |
8126 | + /* if the sector_buffer was allocated, free it */ | |
8127 | + if (buffer) | |
8128 | + kfree(buffer); | |
8129 | + | |
8130 | + /* copy the status value back to the user */ | |
8131 | + tmp.status = rc; | |
8132 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8133 | + rc = -EFAULT; | |
8134 | + | |
8135 | + return (rc); | |
8136 | +} | |
8137 | + | |
8138 | +#undef MAX_BUFFER_SIZE | |
8139 | + | |
8140 | +static int | |
8141 | +evms_ioctl_cmd_get_bmap(struct inode *inode, | |
8142 | + struct file *file, unsigned int cmd, unsigned long arg) | |
8143 | +{ | |
8144 | + int rc = 0; | |
8145 | + struct evms_get_bmap_pkt tmp, *user_parms; | |
8146 | + | |
8147 | + user_parms = (struct evms_get_bmap_pkt *) arg; | |
8148 | + /* copy user's parameters to kernel space */ | |
8149 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
8150 | + rc = -EFAULT; | |
8151 | + | |
8152 | + /* pass the ioctl down the volume stack */ | |
8153 | + if (!rc) { | |
8154 | + struct evms_logical_volume *volume; | |
8155 | + | |
8156 | + volume = &evms_logical_volumes[MINOR(inode->i_rdev)]; | |
8157 | + rc = IOCTL(volume->node, inode, file, cmd, | |
8158 | + (unsigned long) &tmp); | |
8159 | + } | |
8160 | + /* copy the status value back to the user */ | |
8161 | + tmp.status = rc; | |
8162 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8163 | + rc = -EFAULT; | |
8164 | + | |
8165 | + return (rc); | |
8166 | +} | |
8167 | + | |
8168 | +static int | |
8169 | +evms_ioctl_cmd_process_notify_event(unsigned long arg) | |
8170 | +{ | |
8171 | + int rc = 0, found = FALSE; | |
8172 | + struct evms_notify_pkt tmp, *user_parms; | |
8173 | + struct evms_list_node **list_node = NULL; | |
8174 | + struct evms_event *event = NULL; | |
8175 | + | |
8176 | + user_parms = (struct evms_notify_pkt *) arg; | |
8177 | + /* copy user's parameters to kernel space */ | |
8178 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
8179 | + rc = -EFAULT; | |
8180 | + | |
8181 | + /* check to see if PID has already been registered | |
8182 | + * for this event. | |
8183 | + */ | |
8184 | + if (!rc) { | |
8185 | + list_node = &evms_global_notify_list; | |
8186 | + while (*list_node) { | |
8187 | + event = (*list_node)->item; | |
8188 | + if ((event->pid == tmp.eventry.pid) && | |
8189 | + (event->eventid == tmp.eventry.eventid)) { | |
8190 | + found = TRUE; | |
8191 | + break; | |
8192 | + } | |
8193 | + list_node = &(*list_node)->next; | |
8194 | + } | |
8195 | + } | |
8196 | + if (tmp.command) { /* tmp.command == EVMS_REGISTER_EVENT */ | |
8197 | + /* registration code */ | |
8198 | + if (found) { | |
8199 | + rc = -EBUSY; | |
8200 | + LOG_ERROR | |
8201 | + ("error(%d) pid(%d) already register to receive signal(%d) on event(%d).\n", | |
8202 | + rc, tmp.eventry.pid, tmp.eventry.signo, | |
8203 | + tmp.eventry.eventid); | |
8204 | + } else { | |
8205 | + /* register this pid/event type */ | |
8206 | + event = kmalloc(sizeof (struct evms_event), GFP_KERNEL); | |
8207 | + if (!event) { | |
8208 | + rc = -ENOMEM; | |
8209 | + LOG_ERROR | |
8210 | + ("error(%d) allocating event structure.\n", | |
8211 | + rc); | |
8212 | + } else { | |
8213 | + memset(event, 0, sizeof (struct evms_event)); | |
8214 | + event->pid = tmp.eventry.pid; | |
8215 | + event->eventid = tmp.eventry.eventid; | |
8216 | + event->signo = tmp.eventry.signo; | |
8217 | + rc = evms_cs_add_item_to_list | |
8218 | + (&evms_global_notify_list, event); | |
8219 | + } | |
8220 | + } | |
8221 | + } else { /* tmp.command == EVMS_UNREGISTER_EVENT */ | |
8222 | + /* unregistration code */ | |
8223 | + if (!found) { | |
8224 | + rc = -ENODATA; | |
8225 | + LOG_ERROR | |
8226 | + ("error(%d) attempting to unregister a non-registered pid(%d) on event(%d).\n", | |
8227 | + rc, tmp.eventry.pid, tmp.eventry.eventid); | |
8228 | + } else { | |
8229 | + event = (*list_node)->item; | |
8230 | + rc = evms_cs_remove_item_from_list | |
8231 | + (&evms_global_notify_list, event); | |
8232 | + if (!rc) { | |
8233 | + kfree(event); | |
8234 | + } | |
8235 | + } | |
8236 | + } | |
8237 | + /* copy the status value back to the user */ | |
8238 | + tmp.status = rc; | |
8239 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8240 | + rc = -EFAULT; | |
8241 | + | |
8242 | + return (rc); | |
8243 | +} | |
8244 | + | |
8245 | +static int | |
8246 | +evms_ioctl_cmd_check_mount_status(struct inode *inode, struct file *file, | |
8247 | + ulong arg) | |
8248 | +{ | |
8249 | + int rc = 0; | |
8250 | + struct evms_mount_status_pkt tmp, *user_parms; | |
8251 | + | |
8252 | + user_parms = (struct evms_mount_status_pkt *) arg; | |
8253 | + /* copy user's parameters to kernel space */ | |
8254 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
8255 | + rc = -EFAULT; | |
8256 | + | |
8257 | + if (!rc) { | |
8258 | + tmp.mounted = | |
8259 | + (is_mounted(MKDEV(EVMS_MAJOR, tmp.minor))) ? TRUE : FALSE; | |
8260 | + } | |
8261 | + | |
8262 | + /* copy the status value back to the user */ | |
8263 | + tmp.status = rc; | |
8264 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8265 | + rc = -EFAULT; | |
8266 | + | |
8267 | + return (rc); | |
8268 | +} | |
8269 | + | |
8270 | +static int | |
8271 | +evms_ioctl_cmd_check_open_status(struct inode *inode, struct file *file, | |
8272 | + ulong arg) | |
8273 | +{ | |
8274 | + int rc = 0; | |
8275 | + struct evms_open_status_pkt tmp, *user_parms; | |
8276 | + | |
8277 | + user_parms = (struct evms_open_status_pkt *) arg; | |
8278 | + /* copy user's parameters to kernel space */ | |
8279 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
8280 | + rc = -EFAULT; | |
8281 | + | |
8282 | + if (!rc) { | |
8283 | + tmp.opens = is_open(tmp.minor); | |
8284 | + } | |
8285 | + | |
8286 | + /* copy the status value back to the user */ | |
8287 | + tmp.status = rc; | |
8288 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
8289 | + rc = -EFAULT; | |
8290 | + | |
8291 | + return (rc); | |
8292 | +} | |
8293 | + | |
8294 | +/************************************************/ | |
8295 | +/* END -- IOCTL commands -- EVMS specific */ | |
8296 | +/************************************************/ | |
8297 | + | |
8298 | +/************************************************/ | |
8299 | +/* START -- IOCTL commands -- Volume specific */ | |
8300 | +/************************************************/ | |
8301 | + | |
8302 | +/************************************************/ | |
8303 | +/* END -- IOCTL commands -- Volume specific */ | |
8304 | +/************************************************/ | |
8305 | + | |
8306 | +/************************************************/ | |
8307 | +/* START -- IOCTL main */ | |
8308 | +/************************************************/ | |
8309 | + | |
8310 | +/* | |
8311 | + * Function: evms_ioctl | |
8312 | + * | |
8313 | + * This function is the main ioctl entry point for all of evms. | |
8314 | + */ | |
8315 | + | |
8316 | +static int | |
8317 | +evms_ioctl(struct inode *inode, | |
8318 | + struct file *file, unsigned int cmd, unsigned long arg) | |
8319 | +{ | |
8320 | + unsigned long minor = 0; | |
8321 | + int rc = 0; | |
8322 | + struct evms_logical_node *node = NULL; | |
8323 | + | |
8324 | + /* check user access */ | |
8325 | + if (!capable(CAP_SYS_ADMIN)) | |
8326 | + rc = -EACCES; | |
8327 | + | |
8328 | + if (!inode) | |
8329 | + rc = -EINVAL; | |
8330 | + | |
8331 | + if (!rc) { | |
8332 | + /* get the minor */ | |
8333 | + minor = MINOR(inode->i_rdev); | |
8334 | + LOG_EXTRA | |
8335 | + ("ioctl: minor(%lu), dir(%d), size(%d), type(%d), nr(%d)\n", | |
8336 | + minor, (cmd >> _IOC_DIRSHIFT) & _IOC_DIRMASK, | |
8337 | + (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK, | |
8338 | + (cmd >> _IOC_TYPESHIFT) & _IOC_TYPEMASK, | |
8339 | + (cmd >> _IOC_NRSHIFT) & _IOC_NRMASK); | |
8340 | + | |
8341 | + /* insure this minor points to a valid volume */ | |
8342 | + if (minor) { | |
8343 | + node = evms_logical_volumes[minor].node; | |
8344 | + if (node == NULL) | |
8345 | + rc = -ENXIO; | |
8346 | + } | |
8347 | + } | |
8348 | + | |
8349 | + /* process the IOCTL commands */ | |
8350 | + if (!rc) { | |
8351 | + if (!minor) { | |
8352 | + /* process all EVMS specific commands */ | |
8353 | + switch (cmd) { | |
8354 | + case EVMS_GET_IOCTL_VERSION: | |
8355 | + rc = evms_ioctl_cmd_get_ioctl_version((void *) | |
8356 | + arg); | |
8357 | + break; | |
8358 | + case EVMS_GET_VERSION: | |
8359 | + rc = evms_ioctl_cmd_get_version((void *) arg); | |
8360 | + break; | |
8361 | + case EVMS_GET_INFO_LEVEL: | |
8362 | + rc = evms_ioctl_cmd_get_info_level((void *) | |
8363 | + arg); | |
8364 | + break; | |
8365 | + case EVMS_SET_INFO_LEVEL: | |
8366 | + rc = evms_ioctl_cmd_set_info_level((void *) | |
8367 | + arg); | |
8368 | + break; | |
8369 | + case EVMS_REDISCOVER_VOLUMES: | |
8370 | + rc = evms_ioctl_cmd_rediscover_volumes(inode, | |
8371 | + file, | |
8372 | + cmd, | |
8373 | + arg); | |
8374 | + break; | |
8375 | + case EVMS_GET_LOGICAL_DISK: | |
8376 | + rc = evms_ioctl_cmd_get_logical_disk((void *) | |
8377 | + arg); | |
8378 | + break; | |
8379 | + case EVMS_GET_LOGICAL_DISK_INFO: | |
8380 | + rc = evms_ioctl_cmd_get_logical_disk_info((void | |
8381 | + *) | |
8382 | + arg); | |
8383 | + break; | |
8384 | + case EVMS_SECTOR_IO: | |
8385 | + rc = evms_ioctl_cmd_sector_io((void *) arg); | |
8386 | + break; | |
8387 | + case EVMS_GET_MINOR: | |
8388 | + rc = evms_ioctl_cmd_get_minor((void *) arg); | |
8389 | + break; | |
8390 | + case EVMS_GET_VOLUME_DATA: | |
8391 | + rc = evms_ioctl_cmd_get_volume_data((void *) | |
8392 | + arg); | |
8393 | + break; | |
8394 | + case EVMS_DELETE_VOLUME: | |
8395 | + rc = evms_ioctl_cmd_delete_volume(inode, file, | |
8396 | + arg); | |
8397 | + break; | |
8398 | + case EVMS_GET_PLUGIN: | |
8399 | + rc = evms_ioctl_cmd_get_plugin((void *) arg); | |
8400 | + break; | |
8401 | + case EVMS_PLUGIN_IOCTL: | |
8402 | + rc = evms_ioctl_cmd_plugin_ioctl(inode, file, | |
8403 | + cmd, arg); | |
8404 | + break; | |
8405 | + case EVMS_COMPUTE_CSUM: | |
8406 | + rc = evms_ioctl_cmd_kernel_partial_csum((void *) | |
8407 | + arg); | |
8408 | + break; | |
8409 | + case EVMS_PROCESS_NOTIFY_EVENT: | |
8410 | + rc = evms_ioctl_cmd_process_notify_event(arg); | |
8411 | + break; | |
8412 | + case EVMS_CHECK_MOUNT_STATUS: | |
8413 | + rc = evms_ioctl_cmd_check_mount_status(inode, | |
8414 | + file, | |
8415 | + arg); | |
8416 | + break; | |
8417 | + case EVMS_CHECK_OPEN_STATUS: | |
8418 | + rc = evms_ioctl_cmd_check_open_status(inode, | |
8419 | + file, | |
8420 | + arg); | |
8421 | + break; | |
8422 | + default: | |
8423 | + rc = -EINVAL; | |
8424 | + break; | |
8425 | + } | |
8426 | + } else { | |
8427 | + /* process Volume specific commands */ | |
8428 | + switch (cmd) { | |
8429 | + /* pick up standard blk ioctls */ | |
8430 | + case BLKFLSBUF: | |
8431 | + case BLKROSET: | |
8432 | + case BLKROGET: | |
8433 | + case BLKRASET: | |
8434 | + case BLKRAGET: | |
8435 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10) | |
8436 | + case BLKBSZGET: | |
8437 | + case BLKBSZSET: | |
8438 | +#endif | |
8439 | + case BLKSSZGET: | |
8440 | + rc = blk_ioctl(inode->i_rdev, cmd, arg); | |
8441 | + break; | |
8442 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10) | |
8443 | + case BLKGETSIZE: | |
8444 | + { | |
8445 | + /* casting size down to 32-bits until | |
8446 | + * kernel allows return of 64-bit size | |
8447 | + * values. | |
8448 | + */ | |
8449 | + long size = node->total_vsectors; | |
8450 | + if (copy_to_user | |
8451 | + ((long *) arg, &size, | |
8452 | + sizeof (long))) | |
8453 | + rc = -EFAULT; | |
8454 | + } | |
8455 | + break; | |
8456 | + case BLKGETSIZE64: | |
8457 | + { | |
8458 | + u64 size_in_bytes = | |
8459 | + node-> | |
8460 | + total_vsectors << | |
8461 | + EVMS_VSECTOR_SIZE_SHIFT; | |
8462 | + if (copy_to_user | |
8463 | + ((u64 *) arg, &size_in_bytes, | |
8464 | + sizeof (u64))) | |
8465 | + rc = -EFAULT; | |
8466 | + } | |
8467 | + break; | |
8468 | +#endif | |
8469 | + case EVMS_GET_IOCTL_VERSION: | |
8470 | + rc = evms_ioctl_cmd_get_ioctl_version((void *) | |
8471 | + arg); | |
8472 | + break; | |
8473 | + case EVMS_GET_BMAP: | |
8474 | + rc = evms_ioctl_cmd_get_bmap(inode, file, cmd, | |
8475 | + arg); | |
8476 | + break; | |
8477 | + case EVMS_GET_VOL_STRIPE_INFO: | |
8478 | + { | |
8479 | + struct evms_vol_stripe_info_pkt info; | |
8480 | + | |
8481 | + info.size = | |
8482 | + PAGE_SIZE >> | |
8483 | + EVMS_VSECTOR_SIZE_SHIFT; | |
8484 | + info.width = 1; | |
8485 | + if (copy_to_user | |
8486 | + ((struct evms_vol_stripe_info_pkt *) | |
8487 | + arg, &info, sizeof (info))) | |
8488 | + rc = -EFAULT; | |
8489 | + } | |
8490 | + break; | |
8491 | + | |
8492 | + default: | |
8493 | + rc = IOCTL(node, inode, file, cmd, arg); | |
8494 | + break; | |
8495 | + } | |
8496 | + } | |
8497 | + } | |
8498 | + return rc; | |
8499 | +} | |
8500 | + | |
8501 | +/************************************************/ | |
8502 | +/* END -- IOCTL main */ | |
8503 | +/************************************************/ | |
8504 | + | |
8505 | +/************************************************/ | |
8506 | +/* START -- CHECK MEDIA CHANGE */ | |
8507 | +/************************************************/ | |
8508 | + | |
8509 | +static int | |
8510 | +evms_check_media_change(kdev_t dev) | |
8511 | +{ | |
8512 | + int rc = 0; | |
8513 | + struct evms_logical_volume *volume = NULL; | |
8514 | + | |
8515 | + /* check user access */ | |
8516 | + if (!capable(CAP_SYS_ADMIN)) | |
8517 | + rc = -EACCES; | |
8518 | + if (!rc) { | |
8519 | + int minor; | |
8520 | + /* get the minor */ | |
8521 | + minor = MINOR(dev); | |
8522 | + /* insure this minor points to a valid volume */ | |
8523 | + volume = &evms_logical_volumes[minor]; | |
8524 | + if (volume->node == NULL) { | |
8525 | + rc = -ENXIO; | |
8526 | + } | |
8527 | + } | |
8528 | + if (!rc) { | |
8529 | + if (volume->flags & EVMS_DEVICE_REMOVABLE) { | |
8530 | + /* check for media change */ | |
8531 | + rc = evms_cs_kernel_ioctl(volume->node, | |
8532 | + EVMS_CHECK_MEDIA_CHANGE, | |
8533 | + (unsigned long) NULL); | |
8534 | + if (rc < 0) { | |
8535 | + LOG_ERROR | |
8536 | + ("error(%d) doing EVMS_CHECK_MEDIA_CHANGE ioctl on '%s'.\n", | |
8537 | + rc, volume->name); | |
8538 | + } | |
8539 | + } | |
8540 | + } | |
8541 | + return (rc); | |
8542 | +} | |
8543 | + | |
8544 | +/************************************************/ | |
8545 | +/* END -- CHECK MEDIA CHANGE */ | |
8546 | +/************************************************/ | |
8547 | + | |
8548 | +static int | |
8549 | +evms_check_for_device_changes(struct inode *inode, struct file *file) | |
8550 | +{ | |
8551 | + int rc = 0, something_changed = 0, i; | |
8552 | + struct evms_rediscover_pkt kernel_rd_pckt = { 0, 0, NULL }; | |
8553 | + struct evms_list_node *disk_list = NULL, *lnode, *next_lnode; | |
8554 | + struct evms_logical_node *disk, *new_device_list = NULL; | |
8555 | + struct evms_logical_volume *volume = NULL; | |
8556 | + | |
8557 | + /* check for new devices | |
8558 | + * | |
8559 | + * put all new devices on the disk list so they | |
8560 | + * will be included in the rediscovery process. | |
8561 | + */ | |
8562 | + static void evms_discover_logical_disks(struct evms_logical_node **); | |
8563 | + evms_discover_logical_disks(&new_device_list); | |
8564 | + if (new_device_list) { | |
8565 | + LOG_DETAILS("%s: new devices detected.\n", __FUNCTION__); | |
8566 | + something_changed++; | |
8567 | + /* put these new nodes on the disk list */ | |
8568 | + while (new_device_list) { | |
8569 | + disk = new_device_list; | |
8570 | + rc = evms_cs_remove_logical_node_from_list | |
8571 | + (&new_device_list, disk); | |
8572 | + if (rc) { | |
8573 | + LOG_ERROR | |
8574 | + ("%s: error(%d) removing device(%s) from list.\n", | |
8575 | + __FUNCTION__, rc, disk->name); | |
8576 | + } | |
8577 | + rc = evms_cs_add_item_to_list(&disk_list, disk); | |
8578 | + if (rc) { | |
8579 | + LOG_ERROR | |
8580 | + ("%s: error(%d) adding device(%s) from list.\n", | |
8581 | + __FUNCTION__, rc, disk->name); | |
8582 | + } | |
8583 | + } | |
8584 | + } | |
8585 | + | |
8586 | + /* check all devices for changed removable media | |
8587 | + * | |
8588 | + * scan the global device list and issue check | |
8589 | + * media change on each removable media device. | |
8590 | + * put all removable devices that indicate a | |
8591 | + * media change on the disk list. | |
8592 | + * | |
8593 | + * also scan for devices that have been unplugged | |
8594 | + * or contain corrupt volumes. | |
8595 | + */ | |
8596 | + for (lnode = evms_global_device_list; lnode; lnode = lnode->next) { | |
8597 | + int add_to_list = FALSE; | |
8598 | + disk = (struct evms_logical_node *) lnode->item; | |
8599 | + /* only really check removable media devices */ | |
8600 | + if (disk->flags & EVMS_DEVICE_REMOVABLE) { | |
8601 | + /* check for media change */ | |
8602 | + rc = evms_cs_kernel_ioctl(disk, | |
8603 | + EVMS_CHECK_MEDIA_CHANGE, | |
8604 | + (unsigned long) NULL); | |
8605 | + if (rc < 0) { | |
8606 | + LOG_ERROR | |
8607 | + ("%s: error(%d) doing EVMS_CHECK_MEDIA_CHANGE ioctl on '%s'.\n", | |
8608 | + __FUNCTION__, rc, disk->name); | |
8609 | + } else if (rc == 1) { | |
8610 | + add_to_list = TRUE; | |
8611 | + } | |
8612 | + } | |
8613 | + /* check for device that where present | |
8614 | + * before but are gone (unplugged | |
8615 | + * device or unloaded driver). | |
8616 | + */ | |
8617 | + rc = IOCTL(disk, inode, file, | |
8618 | + EVMS_CHECK_DEVICE_STATUS, (ulong) NULL); | |
8619 | + if (rc) { | |
8620 | + LOG_ERROR | |
8621 | + ("error(%d) doing EVMS_CHECK_DEVICE_STATUS ioctl on '%s'.\n", | |
8622 | + rc, volume->name); | |
8623 | + } | |
8624 | + if (disk->flags & EVMS_DEVICE_UNAVAILABLE) { | |
8625 | + add_to_list = TRUE; | |
8626 | + } | |
8627 | + if (add_to_list) { | |
8628 | + something_changed++; | |
8629 | + rc = evms_cs_add_item_to_list(&disk_list, disk); | |
8630 | + } | |
8631 | + } | |
8632 | + /* log a statement that we detected changed media. | |
8633 | + */ | |
8634 | + if (disk_list) { | |
8635 | + LOG_DETAILS("%s: media change detected.\n", __FUNCTION__); | |
8636 | + } | |
8637 | + | |
8638 | + /* check for volumes with removed removable media. | |
8639 | + * mark the volumes that reside on changed media. | |
8640 | + */ | |
8641 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
8642 | + volume = &evms_logical_volumes[i]; | |
8643 | + if (!volume->node) | |
8644 | + continue; | |
8645 | + if (!(volume->flags & EVMS_DEVICE_REMOVABLE)) | |
8646 | + continue; | |
8647 | + if (evms_check_media_change(MKDEV(EVMS_MAJOR, i)) <= 0) | |
8648 | + continue; | |
8649 | + /* remember which volumes have changed media */ | |
8650 | + volume->flags |= EVMS_MEDIA_CHANGED; | |
8651 | + something_changed++; | |
8652 | + } | |
8653 | + | |
8654 | + /* check for removed devices */ | |
8655 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
8656 | + int status; | |
8657 | + volume = &evms_logical_volumes[i]; | |
8658 | + if (!volume->node) | |
8659 | + continue; | |
8660 | + /* check for device status */ | |
8661 | + status = 0; | |
8662 | + rc = IOCTL(volume->node, inode, file, | |
8663 | + EVMS_CHECK_DEVICE_STATUS, (ulong) & status); | |
8664 | + if (rc) { | |
8665 | + LOG_ERROR | |
8666 | + ("error(%d) doing EVMS_CHECK_DEVICE_STATUS ioctl on '%s'.\n", | |
8667 | + rc, volume->name); | |
8668 | + continue; | |
8669 | + } | |
8670 | + if (!(status & EVMS_DEVICE_UNAVAILABLE)) { | |
8671 | + continue; | |
8672 | + } | |
8673 | + /* remember which volumes have changed media */ | |
8674 | + volume->flags |= EVMS_DEVICE_UNPLUGGED; | |
8675 | + something_changed++; | |
8676 | + } | |
8677 | + | |
8678 | + /* do we have some work to do? */ | |
8679 | + if (something_changed) { | |
8680 | + /* check for volumes to be deleted */ | |
8681 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
8682 | + struct evms_quiesce_vol_pkt qv; | |
8683 | + | |
8684 | + volume = &evms_logical_volumes[i]; | |
8685 | + if (!volume->node) | |
8686 | + continue; | |
8687 | + /* only proceed on volumes with: | |
8688 | + * changed media, | |
8689 | + * hot-unplugged devices, | |
8690 | + * & partial volumes | |
8691 | + */ | |
8692 | + if (!(volume->flags & | |
8693 | + (EVMS_MEDIA_CHANGED | | |
8694 | + EVMS_VOLUME_PARTIAL | EVMS_DEVICE_UNPLUGGED))) | |
8695 | + continue; | |
8696 | + /* gather the disk's needing to be | |
8697 | + * rediscovered to rebuild this | |
8698 | + * volume. | |
8699 | + * | |
8700 | + * this will locate other disks that | |
8701 | + * the volume resides on that don't | |
8702 | + * indicate media change. | |
8703 | + */ | |
8704 | + rc = evms_cs_kernel_ioctl(volume->node, | |
8705 | + EVMS_GET_DISK_LIST, | |
8706 | + (unsigned long) &disk_list); | |
8707 | + if (rc) { | |
8708 | + LOG_ERROR | |
8709 | + ("%s: error(%d) retrieving underlying disk list for '%s', skipping ...\n", | |
8710 | + __FUNCTION__, rc, volume->name); | |
8711 | + continue; | |
8712 | + } | |
8713 | + /* quiesce all the changed volumes | |
8714 | + * prior to being deleted. | |
8715 | + */ | |
8716 | + qv.command = 1; // quiesce | |
8717 | + qv.minor = i; // | |
8718 | + qv.status = 0; // reset status | |
8719 | + qv.do_vfs = 0; | |
8720 | + rc = evms_quiesce_volume(volume, inode, file, &qv); | |
8721 | + if (rc) { | |
8722 | + LOG_ERROR | |
8723 | + ("%s: error(%d) attempting to quiesce '%s%s'.\n", | |
8724 | + __FUNCTION__, rc, EVMS_DEV_NODE_PATH, | |
8725 | + volume->name); | |
8726 | + } | |
8727 | + } | |
8728 | + | |
8729 | + /* we need to revalidate all the changed | |
8730 | + * media. this is accomplished by issuing | |
8731 | + * the revalidate disk ioctl to each device | |
8732 | + * with changed media. the device manager | |
8733 | + * remembers which devices indicated | |
8734 | + * media changed (set by check media | |
8735 | + * changed ioctl issued earlier), and will | |
8736 | + * only issue the revalidate disk ioctl to | |
8737 | + * those disks one time. | |
8738 | + * | |
8739 | + * NOTE: | |
8740 | + * this needs to be done BEFORE deleting | |
8741 | + * the volumes because deleting the | |
8742 | + * last segment on disk will cause the | |
8743 | + * associated disk node to freed, and we | |
8744 | + * will not be able to issue the | |
8745 | + * revalidate disk ioctl after that. | |
8746 | + */ | |
8747 | + for (lnode = disk_list; lnode; lnode = lnode->next) { | |
8748 | + disk = (struct evms_logical_node *) lnode->item; | |
8749 | + /* only really do removable media devices */ | |
8750 | + if (disk->flags & EVMS_MEDIA_CHANGED) { | |
8751 | + /* go revalidate the change media */ | |
8752 | + rc = evms_cs_kernel_ioctl(disk, | |
8753 | + EVMS_REVALIDATE_DISK, | |
8754 | + (unsigned long) NULL); | |
8755 | + if (rc) { | |
8756 | + LOG_ERROR | |
8757 | + ("%s: error(%d) attempting to revalidate '%s%s'.\n", | |
8758 | + __FUNCTION__, rc, | |
8759 | + EVMS_DEV_NODE_PATH, volume->name); | |
8760 | + } | |
8761 | + } | |
8762 | + } | |
8763 | + | |
8764 | + /* delete all the affected volumes */ | |
8765 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
8766 | + struct evms_delete_vol_pkt dv; | |
8767 | + | |
8768 | + volume = &evms_logical_volumes[i]; | |
8769 | + if (!volume->node) | |
8770 | + continue; | |
8771 | + /* only proceed on volumes with: | |
8772 | + * changed media, | |
8773 | + * hot-unplugged devices, | |
8774 | + * & partial volumes | |
8775 | + */ | |
8776 | + if (!(volume->flags & | |
8777 | + (EVMS_MEDIA_CHANGED | | |
8778 | + EVMS_VOLUME_PARTIAL | EVMS_DEVICE_UNPLUGGED))) | |
8779 | + continue; | |
8780 | + /* only delete quiesced volumes */ | |
8781 | + if (!volume->quiesced) | |
8782 | + continue; | |
8783 | + /* delete the volume from memory. | |
8784 | + * do a 'soft' delete if volume | |
8785 | + * is mounted, and 'hard' delete | |
8786 | + * if it is not. | |
8787 | + * | |
8788 | + * NOTE: the delete operation will | |
8789 | + * clear the bits in the flags field. | |
8790 | + */ | |
8791 | + dv.command = is_open(i); | |
8792 | + dv.minor = i; | |
8793 | + dv.status = 0; | |
8794 | + rc = evms_delete_volume(volume, &dv); | |
8795 | + } | |
8796 | + | |
8797 | + /* at this point all devices indicating | |
8798 | + * media change that had volumes on them | |
8799 | + * should be gone. however, we could still | |
8800 | + * have devices indicating media change | |
8801 | + * that had no volumes on them in the disk | |
8802 | + * list. we need to delete these devices | |
8803 | + * from kernel memory and the global device | |
8804 | + * list. | |
8805 | + */ | |
8806 | + for (lnode = evms_global_device_list; lnode; lnode = next_lnode) { | |
8807 | + next_lnode = lnode->next; | |
8808 | + | |
8809 | + disk = (struct evms_logical_node *) lnode->item; | |
8810 | + if (disk->flags & EVMS_MEDIA_CHANGED) { | |
8811 | + rc = DELETE(disk); | |
8812 | + } | |
8813 | + } | |
8814 | + | |
8815 | + /* all the devices that indicated media | |
8816 | + * change should be gone, both from kernel | |
8817 | + * memory and global device list. we now | |
8818 | + * need to remove any references to these | |
8819 | + * devices from the disk list. | |
8820 | + * | |
8821 | + * when removable media is installed, it | |
8822 | + * will get detected in the device manager's | |
8823 | + * rediscovery as a new device and added to | |
8824 | + * the discover list. | |
8825 | + */ | |
8826 | + for (lnode = disk_list; lnode; lnode = next_lnode) { | |
8827 | + struct evms_list_node *glnode; | |
8828 | + int lnode_still_there; | |
8829 | + | |
8830 | + next_lnode = lnode->next; | |
8831 | + | |
8832 | + lnode_still_there = FALSE; | |
8833 | + for (glnode = evms_global_device_list; | |
8834 | + glnode; glnode = glnode->next) { | |
8835 | + if (glnode->item == lnode->item) { | |
8836 | + lnode_still_there = TRUE; | |
8837 | + break; | |
8838 | + } | |
8839 | + } | |
8840 | + if (lnode_still_there == FALSE) { | |
8841 | + rc = evms_cs_remove_item_from_list(&disk_list, | |
8842 | + lnode->item); | |
8843 | + if (rc) { | |
8844 | + LOG_ERROR | |
8845 | + ("%s: error(%d) attempting to remove item(%p) from disk_list(%p).\n", | |
8846 | + __FUNCTION__, rc, lnode->item, | |
8847 | + &disk_list); | |
8848 | + } | |
8849 | + } | |
8850 | + } | |
8851 | + | |
8852 | + /* build the in-kernel rediscover packet */ | |
8853 | + | |
8854 | + /* allocate the space for the drive_array in | |
8855 | + * the struct evms_rediscover_pkt packet. to do this | |
8856 | + * we need to count the number of disk nodes, | |
8857 | + * then allocate the necessary space. | |
8858 | + */ | |
8859 | + /* count the disk nodes */ | |
8860 | + for (lnode = disk_list; lnode; lnode = lnode->next) | |
8861 | + kernel_rd_pckt.drive_count++; | |
8862 | + /* allocate the space */ | |
8863 | + if (kernel_rd_pckt.drive_count) { | |
8864 | + kernel_rd_pckt.drive_array = | |
8865 | + kmalloc(kernel_rd_pckt.drive_count * | |
8866 | + sizeof (u64), GFP_KERNEL); | |
8867 | + if (!kernel_rd_pckt.drive_array) { | |
8868 | + rc = -ENOMEM; | |
8869 | + LOG_ERROR | |
8870 | + ("%s: error(%d) allocating rediscover drive array.\n", | |
8871 | + __FUNCTION__, rc); | |
8872 | + } | |
8873 | + } | |
8874 | + /* populate the drive array | |
8875 | + * | |
8876 | + * this also frees the disk_list which is useful | |
8877 | + * if we had an error allocating the drive array. | |
8878 | + */ | |
8879 | + for (i = 0, lnode = disk_list; lnode; lnode = next_lnode, i++) { | |
8880 | + next_lnode = lnode->next; | |
8881 | + | |
8882 | + /* remove this disk from the disk list */ | |
8883 | + disk = (struct evms_logical_node *) lnode->item; | |
8884 | + rc = evms_cs_remove_item_from_list(&disk_list, disk); | |
8885 | + if (!rc) { | |
8886 | + /* add this disk to rediscover | |
8887 | + * packet | |
8888 | + */ | |
8889 | + kernel_rd_pckt.drive_array[i] = | |
8890 | + NODE_TO_DEV_HANDLE(disk); | |
8891 | + } | |
8892 | + } | |
8893 | + /* perform the rediscovery operation */ | |
8894 | + if (!rc) { | |
8895 | + static int evms_discover_volumes(struct | |
8896 | + evms_rediscover_pkt *); | |
8897 | + rc = evms_discover_volumes(&kernel_rd_pckt); | |
8898 | + if (kernel_rd_pckt.drive_count) { | |
8899 | + kfree(kernel_rd_pckt.drive_array); | |
8900 | + } | |
8901 | + } | |
8902 | + LOG_DETAILS("%s: rediscover completed.\n", __FUNCTION__); | |
8903 | + } | |
8904 | + | |
8905 | + return (rc); | |
8906 | +} | |
8907 | + | |
8908 | +/************************************************/ | |
8909 | +/* START -- REVALIDATE DISK */ | |
8910 | +/************************************************/ | |
8911 | + | |
8912 | +static int | |
8913 | +evms_revalidate_disk(kdev_t dev) | |
8914 | +{ | |
8915 | + int rc = 0; | |
8916 | + struct evms_logical_volume *volume = NULL; | |
8917 | + | |
8918 | + /* check user access */ | |
8919 | + if (!capable(CAP_SYS_ADMIN)) | |
8920 | + rc = -EACCES; | |
8921 | + if (!rc) { | |
8922 | + int minor; | |
8923 | + /* get the minor */ | |
8924 | + minor = MINOR(dev); | |
8925 | + /* insure this minor points to a valid volume */ | |
8926 | + volume = &evms_logical_volumes[minor]; | |
8927 | + if (volume->node == NULL) { | |
8928 | + rc = -ENXIO; | |
8929 | + } | |
8930 | + } | |
8931 | + if (!rc) { | |
8932 | + /* go revalidate the change media */ | |
8933 | + rc = evms_cs_kernel_ioctl(volume->node, | |
8934 | + EVMS_REVALIDATE_DISK, | |
8935 | + (unsigned long) NULL); | |
8936 | + } | |
8937 | + return (rc); | |
8938 | +} | |
8939 | + | |
8940 | +/************************************************/ | |
8941 | +/* END -- REVALIDATE DISK */ | |
8942 | +/************************************************/ | |
8943 | + | |
8944 | +/************************************************/ | |
8945 | +/* START -- OPEN */ | |
8946 | +/************************************************/ | |
8947 | + | |
8948 | +static int | |
8949 | +evms_open(struct inode *inode, struct file *file) | |
8950 | +{ | |
8951 | + int rc = 0, minor = 0; | |
8952 | + struct evms_logical_volume *volume = NULL; | |
8953 | + | |
8954 | + /* check user access */ | |
8955 | + if (!capable(CAP_SYS_ADMIN)) | |
8956 | + rc = -EACCES; | |
8957 | + if (!rc) { | |
8958 | + if (!inode) | |
8959 | + rc = -EINVAL; | |
8960 | + } | |
8961 | + rc = evms_check_for_device_changes(inode, file); | |
8962 | + if (!rc) { | |
8963 | + /* get the minor */ | |
8964 | + minor = MINOR(inode->i_rdev); | |
8965 | + if (minor) { | |
8966 | + /* insure this minor points to a valid volume */ | |
8967 | + volume = &evms_logical_volumes[minor]; | |
8968 | + if (volume->node == NULL) { | |
8969 | + rc = -ENXIO; | |
8970 | + } | |
8971 | + } | |
8972 | + } | |
8973 | + /* go "open" the volume */ | |
8974 | + if (!rc && minor) { | |
8975 | + atomic_inc(&volume->opens); | |
8976 | + rc = IOCTL(volume->node, inode, file, | |
8977 | + EVMS_OPEN_VOLUME, (unsigned long) NULL); | |
8978 | + if (rc) { | |
8979 | + LOG_ERROR | |
8980 | + ("error(%d) doing EVMS_OPEN_VOLUME ioctl to '%s'.\n", | |
8981 | + rc, volume->name); | |
8982 | + atomic_dec(&volume->opens); | |
8983 | + } | |
8984 | + } | |
8985 | + return (rc); | |
8986 | +} | |
8987 | + | |
8988 | +/************************************************/ | |
8989 | +/* END -- OPEN */ | |
8990 | +/************************************************/ | |
8991 | + | |
8992 | +/************************************************/ | |
8993 | +/* START -- RELEASE */ | |
8994 | +/************************************************/ | |
8995 | + | |
8996 | +static int | |
8997 | +evms_release(struct inode *inode, struct file *file) | |
8998 | +{ | |
8999 | + int rc = 0, minor = 0; | |
9000 | + struct evms_logical_volume *volume = NULL; | |
9001 | + | |
9002 | + if (!inode) | |
9003 | + rc = -EINVAL; | |
9004 | + if (!rc) { | |
9005 | + /* get the minor */ | |
9006 | + minor = MINOR(inode->i_rdev); | |
9007 | + if (minor) { | |
9008 | + /* insure this minor points to a valid volume */ | |
9009 | + volume = &evms_logical_volumes[minor]; | |
9010 | + if (volume->node == NULL) { | |
9011 | + rc = -ENXIO; | |
9012 | + } | |
9013 | + } | |
9014 | + } | |
9015 | + /* go "close" the volume */ | |
9016 | + if (!rc && minor) { | |
9017 | + rc = IOCTL(volume->node, inode, file, | |
9018 | + EVMS_CLOSE_VOLUME, (unsigned long) NULL); | |
9019 | + if (rc) { | |
9020 | + LOG_ERROR | |
9021 | + ("error(%d) doing EVMS_CLOSE_VOLUME ioctl to '%s'.\n", | |
9022 | + rc, volume->name); | |
9023 | + } else { | |
9024 | + atomic_dec(&volume->opens); | |
9025 | + } | |
9026 | + } | |
9027 | + return (rc); | |
9028 | +} | |
9029 | + | |
9030 | +/************************************************/ | |
9031 | +/* END -- RELEASE */ | |
9032 | +/************************************************/ | |
9033 | + | |
9034 | +static struct block_device_operations evms_fops = { | |
9035 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,14) | |
9036 | + owner:THIS_MODULE, | |
9037 | +#endif | |
9038 | + open:evms_open, | |
9039 | + release:evms_release, | |
9040 | + ioctl:evms_ioctl, | |
9041 | + check_media_change:evms_check_media_change, | |
9042 | + revalidate:evms_revalidate_disk | |
9043 | +}; | |
9044 | + | |
9045 | +/**********************************************************/ | |
9046 | +/* END -- FOPS functions definitions */ | |
9047 | +/**********************************************************/ | |
9048 | + | |
9049 | +/**********************************************************/ | |
9050 | +/* START -- RUNTIME support functions */ | |
9051 | +/**********************************************************/ | |
9052 | + | |
9053 | +static void | |
9054 | +evms_do_request_fn(request_queue_t * q) | |
9055 | +{ | |
9056 | + LOG_WARNING("This function should not be called.\n"); | |
9057 | +} | |
9058 | + | |
9059 | +#ifdef CONFIG_SMP | |
9060 | +static request_queue_t * | |
9061 | +evms_find_queue(kdev_t dev) | |
9062 | +{ | |
9063 | + request_queue_t *rq = NULL; | |
9064 | + struct evms_logical_volume *volume; | |
9065 | + | |
9066 | + volume = &evms_logical_volumes[MINOR(dev)]; | |
9067 | + if (volume->node) | |
9068 | + rq = &volume->request_queue; | |
9069 | + return (rq); | |
9070 | +} | |
9071 | +#endif | |
9072 | + | |
9073 | +/* | |
9074 | + * Function: evms_make_request_fn | |
9075 | + * | |
9076 | + */ | |
9077 | +static int | |
9078 | +evms_make_request_fn(request_queue_t * q, int rw, struct buffer_head *bh) | |
9079 | +{ | |
9080 | + struct evms_logical_volume *volume; | |
9081 | + | |
9082 | + volume = &evms_logical_volumes[MINOR(bh->b_rdev)]; | |
9083 | + wait_event(volume->wait_queue, (!volume->quiesced)); | |
9084 | + if (volume->node) { | |
9085 | + switch (rw) { | |
9086 | + case READ: | |
9087 | + case READA: | |
9088 | + atomic_inc(&volume->requests_in_progress); | |
9089 | + R_IO(volume->node, bh); | |
9090 | + atomic_dec(&volume->requests_in_progress); | |
9091 | + return 0; | |
9092 | + case WRITE: | |
9093 | + atomic_inc(&volume->requests_in_progress); | |
9094 | + W_IO(volume->node, bh); | |
9095 | + atomic_dec(&volume->requests_in_progress); | |
9096 | + return 0; | |
9097 | + default: | |
9098 | + buffer_IO_error(bh); | |
9099 | + return 0; | |
9100 | + } | |
9101 | + } else { | |
9102 | + LOG_ERROR("request for unknown logical volume [minor(%d)].\n", | |
9103 | + MINOR(bh->b_rdev)); | |
9104 | + buffer_IO_error(bh); | |
9105 | + } | |
9106 | + return 0; | |
9107 | +} | |
9108 | + | |
9109 | +/**********************************************************/ | |
9110 | +/* END -- RUNTIME support functions */ | |
9111 | +/**********************************************************/ | |
9112 | + | |
9113 | +/**********************************************************/ | |
9114 | +/* START -- INIT/DISCOVERY support functions */ | |
9115 | +/**********************************************************/ | |
9116 | + | |
9117 | +#ifdef LOCAL_DEBUG | |
9118 | +static void | |
9119 | +display_discover_list(struct evms_logical_node *discover_list, char *text) | |
9120 | +{ | |
9121 | + struct evms_logical_node *node; | |
9122 | + | |
9123 | + LOG_DETAILS("discover list:(%s)\n", text); | |
9124 | + for (node = discover_list; node; node = node->next) { | |
9125 | + LOG_DETAILS("\nnode info:\n"); | |
9126 | + LOG_DETAILS("node.....................(0x%p)\n", node); | |
9127 | + LOG_DETAILS("name.....................(%s)\n", node->name); | |
9128 | + LOG_DETAILS("plugin id................(0x%x)\n", | |
9129 | + node->plugin->id); | |
9130 | + LOG_DETAILS("size.....................("PFU64")\n", | |
9131 | + node->total_vsectors); | |
9132 | + LOG_DETAILS("flags....................(0x%x)\n", node->flags); | |
9133 | + LOG_DETAILS("iflags...................(0x%x)\n", node->iflags); | |
9134 | + LOG_DETAILS("sector size..............(%d)\n", | |
9135 | + node->hardsector_size); | |
9136 | + LOG_DETAILS("block size...............(%d)\n", | |
9137 | + node->block_size); | |
9138 | + LOG_DETAILS("sys id...................(0x%x)\n", | |
9139 | + node->system_id); | |
9140 | + | |
9141 | + if (node->feature_header) { | |
9142 | + struct evms_feature_header *fh; | |
9143 | + | |
9144 | + fh = node->feature_header; | |
9145 | + LOG_DETAILS("\nfeature header:\n"); | |
9146 | + LOG_DETAILS("signature................(0x%x)\n", | |
9147 | + fh->signature); | |
9148 | + LOG_DETAILS("crc......................(0x%x)\n", | |
9149 | + fh->crc); | |
9150 | + LOG_DETAILS("feature header version...(%d.%d.%d)\n", | |
9151 | + fh->version.major, fh->version.minor, | |
9152 | + fh->version.patchlevel); | |
9153 | + LOG_DETAILS("engine version...........(%d.%d.%d)\n", | |
9154 | + fh->engine_version.major, | |
9155 | + fh->engine_version.minor, | |
9156 | + fh->engine_version.patchlevel); | |
9157 | + LOG_DETAILS("flags....................(0x%x)\n", | |
9158 | + fh->flags); | |
9159 | + LOG_DETAILS("feature id...............(0x%x)\n", | |
9160 | + fh->feature_id); | |
9161 | + LOG_DETAILS("sequence#................("PFU64")\n", | |
9162 | + fh->sequence_number); | |
9163 | + LOG_DETAILS("alignment padding........("PFU64")\n", | |
9164 | + fh->alignment_padding); | |
9165 | + LOG_DETAILS("feature data1 lsn........("PFU64")\n", | |
9166 | + fh->feature_data1_start_lsn); | |
9167 | + LOG_DETAILS("feature data1 size.......("PFU64")\n", | |
9168 | + fh->feature_data1_size); | |
9169 | + LOG_DETAILS("feature data2 lsn........("PFU64")\n", | |
9170 | + fh->feature_data2_start_lsn); | |
9171 | + LOG_DETAILS("feature data2 size.......("PFU64")\n", | |
9172 | + fh->feature_data2_size); | |
9173 | + LOG_DETAILS("volume sn................("PFU64")\n", | |
9174 | + fh->volume_serial_number); | |
9175 | + LOG_DETAILS("volume minor#............(%d)\n", | |
9176 | + fh->volume_system_id); | |
9177 | + LOG_DETAILS("object depth.............(%d)\n", | |
9178 | + fh->object_depth); | |
9179 | + LOG_DETAILS("object name..............(%s)\n", | |
9180 | + fh->object_name); | |
9181 | + LOG_DETAILS("volume name..............(%s)\n", | |
9182 | + fh->volume_name); | |
9183 | + } | |
9184 | + | |
9185 | + if (node->volume_info) { | |
9186 | + struct evms_volume_info *vi; | |
9187 | + | |
9188 | + vi = node->volume_info; | |
9189 | + LOG_DETAILS("\nvolume info:\n"); | |
9190 | + LOG_DETAILS("volume name..............(%s)\n", | |
9191 | + vi->volume_name); | |
9192 | + LOG_DETAILS("volume sn................("PFU64")\n", | |
9193 | + vi->volume_sn); | |
9194 | + LOG_DETAILS("volume minor#............(%d)\n", | |
9195 | + vi->volume_minor); | |
9196 | + } | |
9197 | + } | |
9198 | + if (discover_list) { | |
9199 | + LOG_DETAILS("\n"); | |
9200 | + } | |
9201 | +} | |
9202 | +#endif | |
9203 | + | |
9204 | +/* | |
9205 | + * Function: evms_discover_logical_disks | |
9206 | + * Description: Construct the logical disk list by calling all registered device managers. | |
9207 | + */ | |
9208 | +static void | |
9209 | +evms_discover_logical_disks(struct evms_logical_node **disk_list) | |
9210 | +{ | |
9211 | + struct evms_registered_plugin *p; | |
9212 | + LOG_EXTRA("discovering logical disks...\n"); | |
9213 | + for (p = registered_plugin_head; p; p = p->next) { | |
9214 | + if (GetPluginType(p->plugin->id) == EVMS_DEVICE_MANAGER) { | |
9215 | + DISCOVER(p, disk_list); | |
9216 | + } | |
9217 | + } | |
9218 | +} | |
9219 | + | |
9220 | +/* | |
9221 | + * Function: evms_discover_logical_partitions | |
9222 | + * Description: Construct the logical partition list by calling all registered partition managers. | |
9223 | + */ | |
9224 | +static void | |
9225 | +evms_discover_logical_partitions(struct evms_logical_node **discover_list) | |
9226 | +{ | |
9227 | + int rc, done; | |
9228 | + | |
9229 | + struct evms_registered_plugin *p; | |
9230 | + LOG_EXTRA("discovering logical partitions...\n"); | |
9231 | + do { | |
9232 | + done = TRUE; | |
9233 | + for (p = registered_plugin_head; p; p = p->next) { | |
9234 | + if (GetPluginType(p->plugin->id) == | |
9235 | + EVMS_SEGMENT_MANAGER) { | |
9236 | + rc = DISCOVER(p, discover_list); | |
9237 | + /* RC > 0 means the plugin | |
9238 | + * added something to the | |
9239 | + * discover list. This also | |
9240 | + * means we must loop thru | |
9241 | + * these plugins another time. | |
9242 | + * RC == 0 means nothing was | |
9243 | + * added to the discover list | |
9244 | + * by this plugin. | |
9245 | + * RC < 0 means the plugin | |
9246 | + * encountered some error and | |
9247 | + * nothing was added to the list. | |
9248 | + * NOTE: If a plugin has both | |
9249 | + * added something new to the | |
9250 | + * discover list and encountered | |
9251 | + * an error, RC > 0 must be | |
9252 | + * returned. | |
9253 | + */ | |
9254 | + if (rc > 0) | |
9255 | + done = FALSE; | |
9256 | + } | |
9257 | + } | |
9258 | + } while (done == FALSE); | |
9259 | + | |
9260 | + /* send the end of discovery signal to each | |
9261 | + * partition manager plugin. | |
9262 | + */ | |
9263 | + for (p = registered_plugin_head; p; p = p->next) | |
9264 | + if (GetPluginType(p->plugin->id) == EVMS_SEGMENT_MANAGER) | |
9265 | + if (p->plugin->fops->end_discover) | |
9266 | + rc = END_DISCOVER(p, discover_list); | |
9267 | +} | |
9268 | + | |
9269 | +/* | |
9270 | + * Function: evms_discover_volume_groups | |
9271 | + * Description: Find volume groups within the logical partitions list | |
9272 | + */ | |
9273 | +static void | |
9274 | +evms_discover_volume_groups(struct evms_logical_node **discover_list) | |
9275 | +{ | |
9276 | + int rc, done; | |
9277 | + | |
9278 | + struct evms_registered_plugin *p; | |
9279 | + LOG_EXTRA("discovering logical volume groups...\n"); | |
9280 | + do { | |
9281 | + done = TRUE; | |
9282 | + for (p = registered_plugin_head; p; p = p->next) { | |
9283 | + if (GetPluginType(p->plugin->id) == EVMS_REGION_MANAGER) { | |
9284 | + rc = DISCOVER(p, discover_list); | |
9285 | + /* RC > 0 means the plugin | |
9286 | + * added something to the | |
9287 | + * discover list. This also | |
9288 | + * means we must loop thru | |
9289 | + * these plugins another time. | |
9290 | + * RC == 0 means nothing was | |
9291 | + * added to the discover list | |
9292 | + * by this plugin. | |
9293 | + * RC < 0 means the plugin | |
9294 | + * encountered some error and | |
9295 | + * nothing was added to the list. | |
9296 | + * NOTE: If a plugin has both | |
9297 | + * added something new to the | |
9298 | + * discover list and encountered | |
9299 | + * an error, RC > 0 must be | |
9300 | + * returned. | |
9301 | + */ | |
9302 | + if (rc > 0) | |
9303 | + done = FALSE; | |
9304 | + } | |
9305 | + } | |
9306 | + } while (done == FALSE); | |
9307 | + | |
9308 | + /* send the end of discovery signal to each volume | |
9309 | + * group plugin. | |
9310 | + */ | |
9311 | + for (p = registered_plugin_head; p; p = p->next) | |
9312 | + if (GetPluginType(p->plugin->id) == EVMS_REGION_MANAGER) | |
9313 | + if (p->plugin->fops->end_discover) | |
9314 | + rc = END_DISCOVER(p, discover_list); | |
9315 | +} | |
9316 | + | |
9317 | +/* | |
9318 | + * | |
9319 | + * convert all the feature header fields into cpu native format | |
9320 | + * from the on-disk Little Endian format. From this point forward | |
9321 | + * all plugins can deal with feature headers natively. | |
9322 | + */ | |
9323 | +void | |
9324 | +le_feature_header_to_cpu(struct evms_feature_header *fh) | |
9325 | +{ | |
9326 | + fh->signature = le32_to_cpup(&fh->signature); | |
9327 | + fh->crc = le32_to_cpup(&fh->crc); | |
9328 | + fh->version.major = le32_to_cpup(&fh->version.major); | |
9329 | + fh->version.minor = le32_to_cpup(&fh->version.minor); | |
9330 | + fh->version.patchlevel = le32_to_cpup(&fh->version.patchlevel); | |
9331 | + fh->engine_version.major = le32_to_cpup(&fh->engine_version.major); | |
9332 | + fh->engine_version.minor = le32_to_cpup(&fh->engine_version.minor); | |
9333 | + fh->engine_version.patchlevel = | |
9334 | + le32_to_cpup(&fh->engine_version.patchlevel); | |
9335 | + fh->flags = le32_to_cpup(&fh->flags); | |
9336 | + fh->feature_id = le32_to_cpup(&fh->feature_id); | |
9337 | + fh->sequence_number = le64_to_cpup(&fh->sequence_number); | |
9338 | + fh->alignment_padding = le64_to_cpup(&fh->alignment_padding); | |
9339 | + fh->feature_data1_start_lsn = | |
9340 | + le64_to_cpup(&fh->feature_data1_start_lsn); | |
9341 | + fh->feature_data1_size = le64_to_cpup(&fh->feature_data1_size); | |
9342 | + fh->feature_data2_start_lsn = | |
9343 | + le64_to_cpup(&fh->feature_data2_start_lsn); | |
9344 | + fh->feature_data2_size = le64_to_cpup(&fh->feature_data2_size); | |
9345 | + fh->volume_serial_number = le64_to_cpup(&fh->volume_serial_number); | |
9346 | + fh->volume_system_id = le32_to_cpup(&fh->volume_system_id); | |
9347 | + fh->object_depth = le32_to_cpup(&fh->object_depth); | |
9348 | +} | |
9349 | + | |
9350 | +static int | |
9351 | +edef_load_feature_header(struct evms_logical_node *node) | |
9352 | +{ | |
9353 | + int i, rc = 0, rc_array[2] = { 0, 0 }; | |
9354 | + unsigned long size_in_bytes; | |
9355 | + u64 size_in_sectors, starting_sector = 0; | |
9356 | + struct evms_feature_header *fh = NULL, *fh1 = NULL, *fh2 = NULL; | |
9357 | + char *location_name = NULL; | |
9358 | + struct evms_version version = { | |
9359 | + EVMS_FEATURE_HEADER_MAJOR, | |
9360 | + EVMS_FEATURE_HEADER_MINOR, | |
9361 | + EVMS_FEATURE_HEADER_PATCHLEVEL | |
9362 | + }; | |
9363 | + | |
9364 | + if (!node->feature_header) { | |
9365 | + size_in_sectors = evms_cs_size_in_vsectors(sizeof (*fh)); | |
9366 | + size_in_bytes = size_in_sectors << EVMS_VSECTOR_SIZE_SHIFT; | |
9367 | + fh1 = kmalloc(size_in_bytes, GFP_KERNEL); | |
9368 | + if (fh1) { | |
9369 | + fh2 = kmalloc(size_in_bytes, GFP_KERNEL); | |
9370 | + if (!fh2) { | |
9371 | + kfree(fh1); | |
9372 | + rc = -ENOMEM; | |
9373 | + } | |
9374 | + } else { | |
9375 | + rc = -ENOMEM; | |
9376 | + } | |
9377 | + | |
9378 | + for (i = 0; i < 2; i++) { | |
9379 | + if (i == 0) { | |
9380 | + starting_sector = | |
9381 | + node->total_vsectors - size_in_sectors; | |
9382 | + fh = fh1; | |
9383 | + location_name = evms_primary_string; | |
9384 | + } else { | |
9385 | + starting_sector--; | |
9386 | + fh = fh2; | |
9387 | + location_name = evms_secondary_string; | |
9388 | + } | |
9389 | + /* read header into buffer */ | |
9390 | + rc = INIT_IO(node, | |
9391 | + 0, starting_sector, size_in_sectors, fh); | |
9392 | + if (rc) { | |
9393 | + LOG_ERROR | |
9394 | + ("error(%d) probing for %s feature header(at "PFU64") on '%s'.\n", | |
9395 | + rc, location_name, starting_sector, | |
9396 | + node->name); | |
9397 | + rc_array[i] = rc; | |
9398 | + continue; | |
9399 | + } | |
9400 | + /* validate header signature */ | |
9401 | + if (cpu_to_le32(fh->signature) != | |
9402 | + EVMS_FEATURE_HEADER_SIGNATURE) { | |
9403 | + rc = -ENODATA; | |
9404 | + rc_array[i] = rc; | |
9405 | + continue; | |
9406 | + } | |
9407 | + /* validate header CRC */ | |
9408 | + if (fh->crc != EVMS_MAGIC_CRC) { | |
9409 | + u32 org_crc, final_crc; | |
9410 | + org_crc = cpu_to_le32(fh->crc); | |
9411 | + fh->crc = 0; | |
9412 | + final_crc = | |
9413 | + evms_cs_calculate_crc(EVMS_INITIAL_CRC, fh, | |
9414 | + sizeof (*fh)); | |
9415 | + if (final_crc != org_crc) { | |
9416 | + LOG_ERROR | |
9417 | + ("CRC mismatch error [stored(%x), computed(%x)] in %s feature header(at "PFU64") on '%s'.\n", | |
9418 | + org_crc, final_crc, location_name, | |
9419 | + starting_sector, node->name); | |
9420 | + rc = -EINVAL; | |
9421 | + rc_array[i] = rc; | |
9422 | + continue; | |
9423 | + } | |
9424 | + } else { | |
9425 | + LOG_WARNING | |
9426 | + ("CRC disabled in %s feature header(at "PFU64") on '%s'.\n", | |
9427 | + location_name, starting_sector, | |
9428 | + node->name); | |
9429 | + } | |
9430 | + /* convert the feature header from the | |
9431 | + * on-disk format (Little Endian) to | |
9432 | + * native cpu format. | |
9433 | + */ | |
9434 | + le_feature_header_to_cpu(fh); | |
9435 | + /* verify the system data version */ | |
9436 | + rc = evms_cs_check_version(&version, &fh->version); | |
9437 | + if (rc) { | |
9438 | + LOG_ERROR | |
9439 | + ("error: obsolete version(%d,%d,%d) in %s feature header on '%s'.\n", | |
9440 | + fh->version.major, fh->version.minor, | |
9441 | + fh->version.patchlevel, location_name, | |
9442 | + node->name); | |
9443 | + rc_array[i] = rc; | |
9444 | + } | |
9445 | + } | |
9446 | + | |
9447 | + /* getting same return code for both copies? */ | |
9448 | + if (rc_array[0] == rc_array[1]) { | |
9449 | + rc = rc_array[0]; | |
9450 | + /* if no errors on both copies, | |
9451 | + * check the sequence numbers. | |
9452 | + * use the highest sequence number. | |
9453 | + */ | |
9454 | + if (!rc) { | |
9455 | + /* compare sequence numbers */ | |
9456 | + if (fh1->sequence_number == | |
9457 | + fh2->sequence_number) { | |
9458 | + fh = fh1; | |
9459 | + } else { | |
9460 | + LOG_WARNING | |
9461 | + ("%s feature header sequence number("PFU64") mismatches %s feature header sequence number("PFU64") on '%s'!\n", | |
9462 | + evms_primary_string, | |
9463 | + fh1->sequence_number, | |
9464 | + evms_secondary_string, | |
9465 | + fh2->sequence_number, node->name); | |
9466 | + if (fh1->sequence_number > | |
9467 | + fh2->sequence_number) { | |
9468 | + fh = fh1; | |
9469 | + location_name = | |
9470 | + evms_primary_string; | |
9471 | + /* indicate bad sequence number of secondary */ | |
9472 | + rc_array[1] = -1; | |
9473 | + } else { | |
9474 | + fh = fh2; | |
9475 | + location_name = | |
9476 | + evms_secondary_string; | |
9477 | + /* indicate bad sequence number of primary */ | |
9478 | + rc_array[0] = -1; | |
9479 | + } | |
9480 | + } | |
9481 | + } | |
9482 | + /* getting different return codes for each copy */ | |
9483 | + } else | |
9484 | + /* either primary or secondary copy is | |
9485 | + * valid, so use the valid copy. | |
9486 | + */ | |
9487 | + if ((rc_array[0] == 0) || (rc_array[1] == 0)) { | |
9488 | + char *warn_name = NULL; | |
9489 | + | |
9490 | + /* indicate success */ | |
9491 | + rc = 0; | |
9492 | + /* set variables based on which copy is valid */ | |
9493 | + if (rc_array[0] == 0) { | |
9494 | + /* use primary (rear) copy if its good */ | |
9495 | + fh = fh1; | |
9496 | + location_name = evms_primary_string; | |
9497 | + warn_name = evms_secondary_string; | |
9498 | + } else { | |
9499 | + /* use secondary (front) copy if its good */ | |
9500 | + fh = fh2; | |
9501 | + location_name = evms_secondary_string; | |
9502 | + warn_name = evms_primary_string; | |
9503 | + } | |
9504 | + /* warn the user about the invalid copy */ | |
9505 | + LOG_WARNING | |
9506 | + ("warning: error(%d) probing/verifying the %s feature header on '%s'.\n", | |
9507 | + rc_array[0] + rc_array[1], warn_name, node->name); | |
9508 | + } else | |
9509 | + /* both copies had a different error, | |
9510 | + * and one was a fatal error, so | |
9511 | + * indicate fatal error. | |
9512 | + */ | |
9513 | + if ((rc_array[0] == -EINVAL) || (rc_array[1] == -EINVAL)) { | |
9514 | + rc = -EINVAL; | |
9515 | + } | |
9516 | + | |
9517 | + /* on error, set fh to NULL */ | |
9518 | + if (rc) | |
9519 | + fh = NULL; | |
9520 | + | |
9521 | + /* deallocate metadata buffers appropriately */ | |
9522 | + if (fh != fh1) | |
9523 | + kfree(fh1); | |
9524 | + if (fh != fh2) | |
9525 | + kfree(fh2); | |
9526 | + | |
9527 | + /* save validated feature header pointer */ | |
9528 | + if (!rc) { | |
9529 | + node->feature_header = fh; | |
9530 | + if (rc_array[0] != rc_array[1]) { | |
9531 | + LOG_DETAILS | |
9532 | + ("using %s feature header on '%s'.\n", | |
9533 | + location_name, node->name); | |
9534 | + } | |
9535 | + } | |
9536 | + | |
9537 | + /* if no signature found, adjust return code */ | |
9538 | + if (rc == -ENODATA) { | |
9539 | + rc = 0; | |
9540 | + LOG_DEBUG("no feature header found on '%s'.\n", | |
9541 | + node->name); | |
9542 | + } | |
9543 | + } | |
9544 | + return (rc); | |
9545 | +} | |
9546 | + | |
9547 | +static int | |
9548 | +edef_find_first_features(struct evms_logical_node **discover_list) | |
9549 | +{ | |
9550 | + int rc; | |
9551 | + struct evms_logical_node *node, *tmp_list_head; | |
9552 | + | |
9553 | + tmp_list_head = *discover_list; | |
9554 | + *discover_list = NULL; | |
9555 | + | |
9556 | + while (tmp_list_head) { | |
9557 | + struct evms_list_node **evms_node; | |
9558 | + | |
9559 | + node = tmp_list_head; | |
9560 | + rc = evms_cs_remove_logical_node_from_list(&tmp_list_head, | |
9561 | + node); | |
9562 | + if (rc) | |
9563 | + BUG(); | |
9564 | + | |
9565 | + /* check for duplicate pointers | |
9566 | + * search for the node in global list | |
9567 | + */ | |
9568 | + evms_node = | |
9569 | + evms_cs_lookup_item_in_list(&evms_global_feature_node_list, | |
9570 | + node); | |
9571 | + /* already present? */ | |
9572 | + if (*evms_node) { | |
9573 | + /* yes, already present */ | |
9574 | + rc = -ENODATA; /* dont process this node further */ | |
9575 | + LOG_DETAILS("deleting duplicate reference to '%s'.\n", | |
9576 | + node->name); | |
9577 | + /* forget this node */ | |
9578 | + node = NULL; | |
9579 | + } else { | |
9580 | + /* load the feature header if present */ | |
9581 | + rc = edef_load_feature_header(node); | |
9582 | + /* This node have a feature header ? | |
9583 | + * it won't be if there is no header to load | |
9584 | + * OR | |
9585 | + * there was a fatal error attempting to read it. | |
9586 | + */ | |
9587 | + if (node->feature_header) { | |
9588 | + /* check for object flag */ | |
9589 | + if (node->feature_header->flags & | |
9590 | + EVMS_VOLUME_DATA_OBJECT) { | |
9591 | + LOG_DEFAULT | |
9592 | + ("object detected, deleting '%s'.\n", | |
9593 | + node->name); | |
9594 | + rc = -EINVAL; | |
9595 | + } else | |
9596 | + /* check for stop-data flag */ | |
9597 | + if (node->feature_header->flags & | |
9598 | + EVMS_VOLUME_DATA_STOP) { | |
9599 | + LOG_DEFAULT | |
9600 | + ("stop data detected, deleting '%s'.\n", | |
9601 | + node->name); | |
9602 | + rc = -EINVAL; | |
9603 | + } else { | |
9604 | + /* we have a valid feature header. | |
9605 | + * initialize appropriate node fields | |
9606 | + * to indicate this. | |
9607 | + */ | |
9608 | + node->flags |= EVMS_VOLUME_FLAG; | |
9609 | + node->iflags |= EVMS_FEATURE_BOTTOM; | |
9610 | + node->volume_info = | |
9611 | + kmalloc(sizeof | |
9612 | + (struct evms_volume_info), | |
9613 | + GFP_KERNEL); | |
9614 | + if (node->volume_info) { | |
9615 | + /* set up volume | |
9616 | + * info struct | |
9617 | + */ | |
9618 | + memset(node->volume_info, 0, | |
9619 | + sizeof | |
9620 | + (struct | |
9621 | + evms_volume_info)); | |
9622 | + node->volume_info->volume_sn = | |
9623 | + node->feature_header-> | |
9624 | + volume_serial_number; | |
9625 | + node->volume_info-> | |
9626 | + volume_minor = | |
9627 | + node->feature_header-> | |
9628 | + volume_system_id; | |
9629 | + strcpy(node->volume_info-> | |
9630 | + volume_name, | |
9631 | + node->feature_header-> | |
9632 | + volume_name); | |
9633 | + /* register(add) node to | |
9634 | + * the global list. | |
9635 | + */ | |
9636 | + rc = evms_cs_add_item_to_list | |
9637 | + (&evms_global_feature_node_list, | |
9638 | + node); | |
9639 | + } else { | |
9640 | + rc = -ENOMEM; | |
9641 | + } | |
9642 | + } | |
9643 | + } | |
9644 | + } | |
9645 | + /* if any errors, delete the node */ | |
9646 | + if (rc) { | |
9647 | + if (node) { | |
9648 | + DELETE(node); | |
9649 | + } | |
9650 | + } else | |
9651 | + /* on successful processing of this node | |
9652 | + * place it back on the discover list. | |
9653 | + */ | |
9654 | + evms_cs_add_logical_node_to_list(discover_list, node); | |
9655 | + } | |
9656 | + return (0); | |
9657 | +} | |
9658 | + | |
9659 | +/* These define describe the node types that can be isolated. */ | |
9660 | +#define ISOLATE_ASSOCIATIVE_FEATURES 0 | |
9661 | +#define ISOLATE_COMPATIBILITY_VOLUMES 1 | |
9662 | +#define ISOLATE_EVMS_VOLUMES 2 | |
9663 | +#define ISOLATE_EVMS_VOLUME_SERIAL_NUMBER 3 | |
9664 | +#define ISOLATE_EVMS_NODES_BY_FEATURE_AND_DEPTH 4 | |
9665 | +static int | |
9666 | +edef_isolate_nodes_by_type(unsigned int type, | |
9667 | + struct evms_logical_node **src_list, | |
9668 | + struct evms_logical_node **trg_list, | |
9669 | + u32 compare32, u64 compare64) | |
9670 | +{ | |
9671 | + struct evms_logical_node *node, *next_node; | |
9672 | + int rc = 0, found_node; | |
9673 | + struct evms_feature_header *fh = NULL; | |
9674 | + | |
9675 | + for (node = *src_list; node; node = next_node) { | |
9676 | + next_node = node->next; | |
9677 | + | |
9678 | + if (node->feature_header) | |
9679 | + fh = node->feature_header; | |
9680 | + found_node = FALSE; | |
9681 | + switch (type) { | |
9682 | + case ISOLATE_ASSOCIATIVE_FEATURES: | |
9683 | + if (fh) { | |
9684 | + if (GetPluginType(fh->feature_id) == | |
9685 | + EVMS_ASSOCIATIVE_FEATURE) | |
9686 | + found_node = TRUE; | |
9687 | + } | |
9688 | + break; | |
9689 | + case ISOLATE_COMPATIBILITY_VOLUMES: | |
9690 | + if (!(node->flags & EVMS_VOLUME_FLAG)) | |
9691 | + found_node = TRUE; | |
9692 | + break; | |
9693 | + case ISOLATE_EVMS_VOLUMES: | |
9694 | + if (node->flags & EVMS_VOLUME_FLAG) | |
9695 | + found_node = TRUE; | |
9696 | + break; | |
9697 | + /* EVMS volumes with same serial # */ | |
9698 | + case ISOLATE_EVMS_VOLUME_SERIAL_NUMBER: | |
9699 | + if (node->volume_info->volume_sn == compare64) | |
9700 | + found_node = TRUE; | |
9701 | + break; | |
9702 | + case ISOLATE_EVMS_NODES_BY_FEATURE_AND_DEPTH: | |
9703 | + if (fh) | |
9704 | + if (fh->object_depth == compare64) | |
9705 | + if (fh->feature_id == compare32) | |
9706 | + found_node = TRUE; | |
9707 | + break; | |
9708 | + } | |
9709 | + if (found_node == TRUE) { | |
9710 | + rc = evms_cs_remove_logical_node_from_list(src_list, | |
9711 | + node); | |
9712 | + if (rc) | |
9713 | + break; | |
9714 | + rc = evms_cs_add_logical_node_to_list(trg_list, node); | |
9715 | + if (rc) | |
9716 | + break; | |
9717 | + } | |
9718 | + } | |
9719 | + return (rc); | |
9720 | +} | |
9721 | + | |
9722 | +static int | |
9723 | +edef_apply_feature(struct evms_logical_node *node, | |
9724 | + struct evms_logical_node **volume_node_list) | |
9725 | +{ | |
9726 | + struct evms_registered_plugin *p; | |
9727 | + int rc = -1; | |
9728 | + | |
9729 | + for (p = registered_plugin_head; p; p = p->next) { | |
9730 | + if (p->plugin->id == node->feature_header->feature_id) { | |
9731 | + rc = DISCOVER(p, volume_node_list); | |
9732 | + break; | |
9733 | + } | |
9734 | + } | |
9735 | + return (rc); | |
9736 | +} | |
9737 | + | |
9738 | +static int | |
9739 | +edef_get_feature_plugin_header(u32 id, struct evms_plugin_header **header) | |
9740 | +{ | |
9741 | + int rc = -ENOPKG; | |
9742 | + struct evms_registered_plugin *p; | |
9743 | + | |
9744 | + for (p = registered_plugin_head; p; p = p->next) { | |
9745 | + if (p->plugin->id == id) { | |
9746 | + *header = p->plugin; | |
9747 | + rc = 0; | |
9748 | + break; | |
9749 | + } | |
9750 | + } | |
9751 | + if (rc) { | |
9752 | + LOG_SERIOUS("no plugin loaded for feature id(0x%x)\n", id); | |
9753 | + } | |
9754 | + return (rc); | |
9755 | +} | |
9756 | + | |
9757 | +typedef struct evms_volume_build_info_s { | |
9758 | + int node_count; | |
9759 | + int feature_header_count; | |
9760 | + int feature_count; | |
9761 | + int associative_feature_count; | |
9762 | + u64 max_depth; | |
9763 | + struct evms_plugin_header *plugin; | |
9764 | + struct evms_logical_node *feature_node_list; | |
9765 | +} evms_volume_build_info_t; | |
9766 | + | |
9767 | +/* | |
9768 | + * edef_evaluate_volume_node_list: | |
9769 | + * does: | |
9770 | + * 1) put all nodes from feature list back on volume list | |
9771 | + * 2) loads the node's feature headers | |
9772 | + * 3) counts the node list's entries | |
9773 | + * 4) builds the feature node list | |
9774 | + * 5) counts the feature headers for associative features | |
9775 | + * 6) sets feature count to >1 if >1 features to be processed | |
9776 | + */ | |
9777 | +static int | |
9778 | +edef_evaluate_volume_node_list(struct evms_logical_node **volume_node_list, | |
9779 | + evms_volume_build_info_t * vbi, | |
9780 | + int volume_complete) | |
9781 | +{ | |
9782 | + int rc; | |
9783 | + struct evms_logical_node *node; | |
9784 | + | |
9785 | + vbi->node_count = | |
9786 | + vbi->feature_count = | |
9787 | + vbi->associative_feature_count = vbi->max_depth = 0; | |
9788 | + vbi->plugin = NULL; | |
9789 | + | |
9790 | + /* put all feature nodes back on the volume list */ | |
9791 | + rc = edef_isolate_nodes_by_type(ISOLATE_EVMS_VOLUMES, | |
9792 | + &vbi->feature_node_list, | |
9793 | + volume_node_list, 0, 0); | |
9794 | + if (rc) | |
9795 | + return (rc); | |
9796 | + | |
9797 | + /* load all the feature headers */ | |
9798 | + if (!volume_complete) { | |
9799 | + for (node = *volume_node_list; node; node = node->next) { | |
9800 | + rc = edef_load_feature_header(node); | |
9801 | + if (rc) | |
9802 | + return (rc); | |
9803 | + } | |
9804 | + } | |
9805 | + | |
9806 | + /* find the 1st max depth object: | |
9807 | + * record the depth | |
9808 | + * record the plugin | |
9809 | + */ | |
9810 | + for (node = *volume_node_list; node; node = node->next) { | |
9811 | + struct evms_plugin_header *plugin; | |
9812 | + struct evms_feature_header *fh = node->feature_header; | |
9813 | + | |
9814 | + /* count the nodes */ | |
9815 | + vbi->node_count++; | |
9816 | + | |
9817 | + /* no feature header found, continue to next node */ | |
9818 | + if (!fh) | |
9819 | + continue; | |
9820 | + | |
9821 | + /* check the depth */ | |
9822 | + if (fh->object_depth > vbi->max_depth) { | |
9823 | + /* record new max depth */ | |
9824 | + vbi->max_depth = fh->object_depth; | |
9825 | + /* find the plugin header for this feature id */ | |
9826 | + rc = edef_get_feature_plugin_header(fh->feature_id, | |
9827 | + &plugin); | |
9828 | + if (rc) | |
9829 | + return (rc); | |
9830 | + /* check for >1 plugins */ | |
9831 | + if (vbi->plugin != plugin) { | |
9832 | + vbi->feature_count++; | |
9833 | + vbi->plugin = plugin; | |
9834 | + } | |
9835 | + } | |
9836 | + /* check for "associative" feature indicator */ | |
9837 | + if (GetPluginType(vbi->plugin->id) == EVMS_ASSOCIATIVE_FEATURE) | |
9838 | + vbi->associative_feature_count++; | |
9839 | + } | |
9840 | + /* build a list of max depth nodes for this feature */ | |
9841 | + if (vbi->max_depth) { | |
9842 | + rc = edef_isolate_nodes_by_type | |
9843 | + (ISOLATE_EVMS_NODES_BY_FEATURE_AND_DEPTH, volume_node_list, | |
9844 | + &vbi->feature_node_list, vbi->plugin->id, vbi->max_depth); | |
9845 | + if (rc) | |
9846 | + return (rc); | |
9847 | + if (!vbi->plugin) | |
9848 | + return (-ENODATA); | |
9849 | + if (!vbi->feature_node_list) | |
9850 | + return (-ENODATA); | |
9851 | + } | |
9852 | + | |
9853 | + return (rc); | |
9854 | +} | |
9855 | + | |
9856 | +/* function: edef_check_feature_conditions | |
9857 | + * | |
9858 | + * This routine verifies the state of volume based on the features | |
9859 | + * headers and nodes in the current discovery list. All detected | |
9860 | + * errors are considered fatal. | |
9861 | + */ | |
9862 | +static int | |
9863 | +edef_check_feature_conditions(evms_volume_build_info_t * vbi) | |
9864 | +{ | |
9865 | + int rc = 0; | |
9866 | + | |
9867 | + if (vbi->associative_feature_count) { | |
9868 | + if (vbi->node_count > 1) { | |
9869 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9870 | + LOG_ERROR | |
9871 | + ("associative ERROR: > 1 nodes(%d) remaining to be processed!\n", | |
9872 | + vbi->node_count); | |
9873 | + } else if (vbi->max_depth != 1) { | |
9874 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9875 | + LOG_ERROR | |
9876 | + ("associative ERROR: associative feature found at node depth("PFU64") != 1!\n", | |
9877 | + vbi->max_depth); | |
9878 | + } else | |
9879 | + rc = -EVMS_ASSOCIATIVE_FEATURE; | |
9880 | + } | |
9881 | + if (!rc) { | |
9882 | + if (!vbi->max_depth) { | |
9883 | + if (vbi->node_count > 1) { | |
9884 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9885 | + LOG_ERROR | |
9886 | + ("max depth ERROR: > 1 nodes(%d) remaining to be processed!\n", | |
9887 | + vbi->node_count); | |
9888 | + } | |
9889 | + } else if (vbi->max_depth == 1) { | |
9890 | + if (vbi->feature_count > 1) { | |
9891 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9892 | + LOG_ERROR | |
9893 | + ("max depth 1 ERROR: > 1 features remaining to be processed!\n"); | |
9894 | + } | |
9895 | + } | |
9896 | + } | |
9897 | + return (rc); | |
9898 | +} | |
9899 | + | |
9900 | +/* function: edef_apply_features | |
9901 | + * | |
9902 | + * This routine applies none, one, or more features to an EVMS | |
9903 | + * volume. The system data structure is first verified and then | |
9904 | + * features are applied and verified recursively until the | |
9905 | + * entire volume has been constructed. Fatal errors result in | |
9906 | + * all nodes in the volume discovery list being deleted. | |
9907 | + */ | |
9908 | +static int | |
9909 | +edef_apply_features(struct evms_logical_node **volume_node_list) | |
9910 | +{ | |
9911 | + int rc = 1, done, top_feature_applying; | |
9912 | + evms_volume_build_info_t vbi; | |
9913 | + | |
9914 | + vbi.feature_node_list = NULL; | |
9915 | + rc = edef_evaluate_volume_node_list(volume_node_list, &vbi, FALSE); | |
9916 | + | |
9917 | + /* ensure we don't go into the next loop | |
9918 | + * without having a target plugin to | |
9919 | + * pass control to. | |
9920 | + */ | |
9921 | + if (!rc) { | |
9922 | + if (!vbi.plugin) { | |
9923 | + rc = -ENODATA; | |
9924 | + } | |
9925 | + } | |
9926 | + | |
9927 | + /* this loop should ONLY get used when | |
9928 | + * there are features to process. | |
9929 | + */ | |
9930 | + done = (rc) ? TRUE : FALSE; | |
9931 | + while (!done) { | |
9932 | + rc = edef_check_feature_conditions(&vbi); | |
9933 | + if (rc) | |
9934 | + break; | |
9935 | + top_feature_applying = (vbi.max_depth == 1) ? TRUE : FALSE; | |
9936 | + rc = vbi.plugin->fops->discover(&vbi.feature_node_list); | |
9937 | + if (!rc) { | |
9938 | + rc = edef_evaluate_volume_node_list(volume_node_list, | |
9939 | + &vbi, | |
9940 | + top_feature_applying); | |
9941 | + if (top_feature_applying == TRUE) { | |
9942 | + if (vbi.node_count > 1) { | |
9943 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9944 | + LOG_ERROR | |
9945 | + ("ERROR: detected > 1 node at volume completion!\n"); | |
9946 | + } | |
9947 | + done = TRUE; | |
9948 | + } else { | |
9949 | + if (!vbi.plugin) { | |
9950 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9951 | + LOG_ERROR | |
9952 | + ("ERROR: depth("PFU64"): expected another feature!\n", | |
9953 | + vbi.max_depth); | |
9954 | + done = TRUE; | |
9955 | + } | |
9956 | + } | |
9957 | + } else { /* rc != 0 */ | |
9958 | + rc = -EVMS_VOLUME_FATAL_ERROR; | |
9959 | + done = TRUE; | |
9960 | + } | |
9961 | + } | |
9962 | + if (rc) | |
9963 | + /* put all feature nodes back on the volume list */ | |
9964 | + if (edef_isolate_nodes_by_type(ISOLATE_EVMS_VOLUMES, | |
9965 | + &vbi.feature_node_list, | |
9966 | + volume_node_list, 0, 0)) | |
9967 | + BUG(); | |
9968 | + return (rc); | |
9969 | +} | |
9970 | + | |
9971 | +static int | |
9972 | +edef_delete_node(struct evms_logical_node **node_list, | |
9973 | + struct evms_logical_node *node, int return_code, | |
9974 | + char *log_text) | |
9975 | +{ | |
9976 | + int rc; | |
9977 | + | |
9978 | + rc = evms_cs_remove_logical_node_from_list(node_list, node); | |
9979 | + if (!rc) { | |
9980 | + LOG_ERROR("%s error(%d): deleting volume(%s), node(%s)\n", | |
9981 | + log_text, return_code, | |
9982 | + node->volume_info->volume_name, node->name); | |
9983 | + rc = DELETE(node); | |
9984 | + if (rc) { | |
9985 | + LOG_ERROR("error(%d) while deleting node(%s)\n", | |
9986 | + rc, node->name); | |
9987 | + } | |
9988 | + } else { | |
9989 | + LOG_WARNING | |
9990 | + ("%s error(%d): node gone, assumed deleted by plugin.\n", | |
9991 | + log_text, return_code); | |
9992 | + /* plugin must have cleaned up the node. | |
9993 | + * So just reset the return code and leave. | |
9994 | + */ | |
9995 | + rc = 0; | |
9996 | + } | |
9997 | + | |
9998 | + return (rc); | |
9999 | +} | |
10000 | + | |
10001 | +static int | |
10002 | +edef_process_evms_volumes(struct evms_logical_node **discover_list, | |
10003 | + struct evms_logical_node **associative_feature_list) | |
10004 | +{ | |
10005 | + int rc = 0; | |
10006 | + struct evms_logical_node *node, *evms_volumes_list, *volume_node_list; | |
10007 | + u64 volume_sn; | |
10008 | + | |
10009 | + /* put all EVMS volumes on their own list */ | |
10010 | + evms_volumes_list = NULL; | |
10011 | + rc = edef_isolate_nodes_by_type(ISOLATE_EVMS_VOLUMES, | |
10012 | + discover_list, | |
10013 | + &evms_volumes_list, 0, 0); | |
10014 | + | |
10015 | + /* apply features to each EVMS volume */ | |
10016 | + /* one volume at a time on each pass */ | |
10017 | + while (evms_volumes_list) { | |
10018 | + node = evms_volumes_list; | |
10019 | + /* put all nodes for one EVMS volume on separate list */ | |
10020 | + volume_node_list = NULL; | |
10021 | + volume_sn = node->volume_info->volume_sn; | |
10022 | + rc = edef_isolate_nodes_by_type | |
10023 | + (ISOLATE_EVMS_VOLUME_SERIAL_NUMBER, &evms_volumes_list, | |
10024 | + &volume_node_list, 0, volume_sn); | |
10025 | + if (rc) | |
10026 | + break; | |
10027 | + /* go apply all the volume features now */ | |
10028 | + rc = edef_apply_features(&volume_node_list); | |
10029 | + switch (rc) { | |
10030 | + case 0: /* SUCCESS */ | |
10031 | + /* remove volume just processed */ | |
10032 | + node = volume_node_list; | |
10033 | + rc = evms_cs_remove_logical_node_from_list | |
10034 | + (&volume_node_list, node); | |
10035 | + if (rc) | |
10036 | + break; | |
10037 | + /* put volume on global list */ | |
10038 | + rc = evms_cs_add_logical_node_to_list(discover_list, | |
10039 | + node); | |
10040 | + break; | |
10041 | + case -EVMS_ASSOCIATIVE_FEATURE: | |
10042 | + /* put all "associative" features on their own list */ | |
10043 | + rc = edef_isolate_nodes_by_type | |
10044 | + (ISOLATE_ASSOCIATIVE_FEATURES, &volume_node_list, | |
10045 | + associative_feature_list, 0, 0); | |
10046 | + break; | |
10047 | + default: /* FATAL ERROR */ | |
10048 | + /* delete each node remaining in the list */ | |
10049 | + if (volume_node_list) { | |
10050 | + LOG_ERROR | |
10051 | + ("encountered fatal error building volume '%s'\n", | |
10052 | + volume_node_list->volume_info-> | |
10053 | + volume_name); | |
10054 | + } | |
10055 | + while (volume_node_list) { | |
10056 | + node = volume_node_list; | |
10057 | + edef_delete_node(&volume_node_list, | |
10058 | + node, rc, "EVMS feature"); | |
10059 | + } | |
10060 | + rc = 0; | |
10061 | + break; | |
10062 | + } | |
10063 | + if (rc) | |
10064 | + break; | |
10065 | + } | |
10066 | + return (rc); | |
10067 | +} | |
10068 | + | |
10069 | +static int | |
10070 | +edef_process_associative_volumes(struct evms_logical_node | |
10071 | + **associative_feature_list, | |
10072 | + struct evms_logical_node **discover_list) | |
10073 | +{ | |
10074 | + int rc = 0; | |
10075 | + struct evms_logical_node *node; | |
10076 | + | |
10077 | + while (*associative_feature_list) { | |
10078 | + node = *associative_feature_list; | |
10079 | + /* remove this node from associative feature list */ | |
10080 | + rc = evms_cs_remove_logical_node_from_list | |
10081 | + (associative_feature_list, node); | |
10082 | + if (rc) | |
10083 | + break; | |
10084 | + /* put volume on global list */ | |
10085 | + rc = evms_cs_add_logical_node_to_list(discover_list, node); | |
10086 | + if (rc) | |
10087 | + break; | |
10088 | + rc = edef_load_feature_header(node); | |
10089 | + if (rc) | |
10090 | + break; | |
10091 | + rc = edef_apply_feature(node, discover_list); | |
10092 | + if (rc) | |
10093 | + edef_delete_node(discover_list, node, rc, | |
10094 | + "Associative feature"); | |
10095 | + } | |
10096 | + return (rc); | |
10097 | +} | |
10098 | + | |
10099 | +static int | |
10100 | +edef_check_for_incomplete_volumes(struct evms_logical_node **discover_list) | |
10101 | +{ | |
10102 | + int rc = 0; | |
10103 | + struct evms_logical_node *next_node, *node; | |
10104 | + | |
10105 | + /* check to see if any incomplete volumes are left around */ | |
10106 | + /* if so, delete them. */ | |
10107 | + /* complete volumes should not have feature_headers */ | |
10108 | + /* hanging off them, if we find any, we know the volume */ | |
10109 | + /* is incomplete. */ | |
10110 | + | |
10111 | + for (node = *discover_list; node; node = next_node) { | |
10112 | + next_node = node->next; | |
10113 | + | |
10114 | + if (node->feature_header) { | |
10115 | + edef_delete_node(discover_list, node, rc, | |
10116 | + "Unexpected feature header"); | |
10117 | + } | |
10118 | + } | |
10119 | + return (rc); | |
10120 | +} | |
10121 | + | |
10122 | +/* | |
10123 | + * Function: evms_discover_evms_features | |
10124 | + * Description: Find features for nodes on the logical partitions list | |
10125 | + */ | |
10126 | +static int | |
10127 | +evms_discover_evms_features(struct evms_logical_node **discover_list) | |
10128 | +{ | |
10129 | + struct evms_logical_node *associative_feature_list; | |
10130 | + int rc = 0; | |
10131 | + | |
10132 | + LOG_EXTRA("discovering evms volume features...\n"); | |
10133 | + | |
10134 | + /* initialize "associative" features list */ | |
10135 | + associative_feature_list = NULL; | |
10136 | + | |
10137 | + /* find the bottom features */ | |
10138 | + rc = edef_find_first_features(discover_list); | |
10139 | +#ifdef LOCAL_DEBUG | |
10140 | + display_discover_list(*discover_list, "after 1st features hdr"); | |
10141 | +#endif | |
10142 | + if (!rc) | |
10143 | + /* process EVMS volumes here */ | |
10144 | + rc = edef_process_evms_volumes(discover_list, | |
10145 | + &associative_feature_list); | |
10146 | +#ifdef LOCAL_DEBUG | |
10147 | + display_discover_list(*discover_list, "after evms volumes"); | |
10148 | +#endif | |
10149 | + if (!rc) | |
10150 | + /* process "associative" features here */ | |
10151 | + rc = edef_process_associative_volumes(&associative_feature_list, | |
10152 | + discover_list); | |
10153 | +#ifdef LOCAL_DEBUG | |
10154 | + display_discover_list(*discover_list, "after associatives"); | |
10155 | +#endif | |
10156 | + if (!rc) | |
10157 | + /* check for incomplete volumes */ | |
10158 | + rc = edef_check_for_incomplete_volumes(discover_list); | |
10159 | + | |
10160 | + return (rc); | |
10161 | +} | |
10162 | + | |
10163 | +/* | |
10164 | + * function: eelv_assign_volume_minor | |
10165 | + * | |
10166 | + * This is a support function for evms_export_logical_volumes. | |
10167 | + * This routine assigns a specific minor number to a volume. It | |
10168 | + * also performs the remaining steps to make this volume visible | |
10169 | + * and usable to the kernel. | |
10170 | + * | |
10171 | + */ | |
10172 | +static void | |
10173 | +eelv_assign_volume_minor(struct evms_logical_node *node, int minor) | |
10174 | +{ | |
10175 | + struct evms_logical_volume *volume; | |
10176 | + | |
10177 | + /* initialize the logical_node entry in the volume array */ | |
10178 | + volume = &evms_logical_volumes[minor]; | |
10179 | + volume->node = node; | |
10180 | + volume->name = | |
10181 | + kmalloc(strlen(EVMS_GET_NODE_NAME(node)) + 1, GFP_KERNEL); | |
10182 | + if (!volume->name) | |
10183 | + BUG(); | |
10184 | + strcpy(volume->name, EVMS_GET_NODE_NAME(node)); | |
10185 | + | |
10186 | + /* copy flags from top level node into volume structure */ | |
10187 | + volume->flags = node->flags; | |
10188 | + | |
10189 | + /* check for read-only volume */ | |
10190 | + if (volume->flags & EVMS_VOLUME_READ_ONLY) { | |
10191 | + set_device_ro(MKDEV(EVMS_MAJOR, minor), 1); | |
10192 | + } | |
10193 | + | |
10194 | + /* adjust volume size based on hardsector size */ | |
10195 | + node->total_vsectors &= | |
10196 | + ~((node->hardsector_size >> EVMS_VSECTOR_SIZE_SHIFT) - 1); | |
10197 | + | |
10198 | + /* initialize the global device arrays */ | |
10199 | + blksize_size[EVMS_MAJOR][minor] = node->block_size; | |
10200 | + hardsect_size[EVMS_MAJOR][minor] = node->hardsector_size; | |
10201 | + blk_size[EVMS_MAJOR][minor] = (int) (node->total_vsectors >> 1); | |
10202 | + | |
10203 | + /* register this volume with devfs */ | |
10204 | + volume->devfs_handle = | |
10205 | + devfs_register(evms_dir_devfs_handle, | |
10206 | + volume->name, | |
10207 | + DEVFS_FL_DEFAULT, | |
10208 | + EVMS_MAJOR, minor, | |
10209 | + S_IFBLK | S_IRUGO | S_IWUGO, &evms_fops, NULL); | |
10210 | + | |
10211 | + evms_volumes++; | |
10212 | + | |
10213 | + LOG_DEFAULT("Exporting EVMS Volume(%u,%u) from \"%s%s\".\n", | |
10214 | + EVMS_MAJOR, minor, EVMS_DEV_NODE_PATH, volume->name); | |
10215 | +} | |
10216 | + | |
10217 | +/* | |
10218 | + * function: eelv_check_for_duplicity | |
10219 | + * | |
10220 | + * This is a support function for evms_export_logical_volumes. | |
10221 | + * This routine compares the serial number in the top most node | |
10222 | + * in the volume to the list of currently exported volumes. If | |
10223 | + * this volumes serial number is found in the list then we know | |
10224 | + * this volume is a duplicate and it is then delete. | |
10225 | + * | |
10226 | + */ | |
10227 | +static void | |
10228 | +eelv_check_for_duplicity(struct evms_logical_node **discover_list) | |
10229 | +{ | |
10230 | + struct evms_logical_node *next_node, *node; | |
10231 | + struct evms_logical_volume *lv; | |
10232 | + int i, is_dup; | |
10233 | + | |
10234 | + for (node = *discover_list; node; node = next_node) { | |
10235 | + next_node = node->next; | |
10236 | + | |
10237 | + is_dup = FALSE; | |
10238 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10239 | + lv = &evms_logical_volumes[i]; | |
10240 | + /* only check exported volumes */ | |
10241 | + if (lv->node) { | |
10242 | + char *type_ptr = NULL; | |
10243 | + | |
10244 | + /* check for duplicate pointer */ | |
10245 | + if (node == lv->node) { | |
10246 | + is_dup = TRUE; | |
10247 | + type_ptr = "pointer"; | |
10248 | + /* check for duplicate node */ | |
10249 | + } else if (!strcmp(node->name, lv->node->name)) { | |
10250 | + is_dup = TRUE; | |
10251 | + type_ptr = "node"; | |
10252 | + } | |
10253 | + if (is_dup == TRUE) { | |
10254 | + evms_cs_remove_logical_node_from_list | |
10255 | + (discover_list, node); | |
10256 | + LOG_DETAILS | |
10257 | + ("deleting duplicate %s to EVMS volume(%u,%u,%s)...\n", | |
10258 | + type_ptr, EVMS_MAJOR, i, | |
10259 | + EVMS_GET_NODE_NAME(node)); | |
10260 | + /* forget duplicate */ | |
10261 | + break; | |
10262 | + } | |
10263 | + } | |
10264 | + } | |
10265 | + } | |
10266 | +} | |
10267 | + | |
10268 | +/* | |
10269 | + * function: eelv_reassign_soft_deleted_volume_minors | |
10270 | + * | |
10271 | + * This is a support function for evms_export_logical_volumes. | |
10272 | + * This routine reassigns minor numbers to rediscovered "soft" | |
10273 | + * deleted volumes. | |
10274 | + * | |
10275 | + */ | |
10276 | +static void | |
10277 | +eelv_reassign_soft_deleted_volume_minors(struct evms_logical_node | |
10278 | + **discover_list) | |
10279 | +{ | |
10280 | + struct evms_logical_node *next_node, *node; | |
10281 | + struct evms_logical_volume *lv; | |
10282 | + int i, node_removed; | |
10283 | + | |
10284 | + for (node = *discover_list; node; node = next_node) { | |
10285 | + next_node = node->next; | |
10286 | + | |
10287 | + node_removed = FALSE; | |
10288 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10289 | + lv = &evms_logical_volumes[i]; | |
10290 | + /* only check soft deleted volumes: | |
10291 | + * they have a non-NULL name. | |
10292 | + */ | |
10293 | + if (lv->flags & EVMS_VOLUME_SOFT_DELETED) { | |
10294 | + if (!strcmp(EVMS_GET_NODE_NAME(node), lv->name)) { | |
10295 | + /* reassign requested minor */ | |
10296 | + evms_cs_remove_logical_node_from_list | |
10297 | + (discover_list, node); | |
10298 | + node_removed = TRUE; | |
10299 | + LOG_DEFAULT("Re"); | |
10300 | + /* free the previously used name */ | |
10301 | + kfree(lv->name); | |
10302 | + lv->name = NULL; | |
10303 | + /* clear the EVMS_VOLUME_SOFT_DELETED flag */ | |
10304 | + lv->flags = 0; | |
10305 | + eelv_assign_volume_minor(node, i); | |
10306 | + break; | |
10307 | + } | |
10308 | + } | |
10309 | + } | |
10310 | + } | |
10311 | +} | |
10312 | + | |
10313 | +/* | |
10314 | + * function: eelv_assign_evms_volume_minors | |
10315 | + * | |
10316 | + * This is a support function for evms_export_logical_volumes. | |
10317 | + * This routine assigns minor numbers to new evms volumes. If | |
10318 | + * the specified minor is already in use, the requested minor | |
10319 | + * is set to 0, and will be assigned next available along with | |
10320 | + * any remaining volumes at the end of evms_export_logical_volumes. | |
10321 | + * | |
10322 | + */ | |
10323 | +static void | |
10324 | +eelv_assign_evms_volume_minors(struct evms_logical_node **discover_list) | |
10325 | +{ | |
10326 | + struct evms_logical_node *next_node, *node, *lv_node; | |
10327 | + unsigned int requested_minor, node_removed; | |
10328 | + | |
10329 | + for (node = *discover_list; node; node = next_node) { | |
10330 | + next_node = node->next; | |
10331 | + | |
10332 | + node_removed = FALSE; | |
10333 | + /* only process evms volumes */ | |
10334 | + if (node->flags & EVMS_VOLUME_FLAG) { | |
10335 | + requested_minor = node->volume_info->volume_minor; | |
10336 | + /* is there a requested minor? */ | |
10337 | + if (requested_minor) { | |
10338 | + int lv_flags = 0; | |
10339 | + | |
10340 | + /* check range of requested minor */ | |
10341 | + if (requested_minor >= MAX_EVMS_VOLUMES) | |
10342 | + lv_node = node; | |
10343 | + else { | |
10344 | + struct evms_logical_volume *lv; | |
10345 | + lv = &evms_logical_volumes | |
10346 | + [requested_minor]; | |
10347 | + lv_node = lv->node; | |
10348 | + lv_flags = lv->flags; | |
10349 | + } | |
10350 | + if ((!lv_node) | |
10351 | + && (!(lv_flags & EVMS_VOLUME_SOFT_DELETED))) { | |
10352 | + /* assign requested minor */ | |
10353 | + evms_cs_remove_logical_node_from_list | |
10354 | + (discover_list, node); | |
10355 | + node_removed = TRUE; | |
10356 | + eelv_assign_volume_minor(node, | |
10357 | + requested_minor); | |
10358 | + } else { | |
10359 | + LOG_WARNING | |
10360 | + ("EVMS volume(%s) requesting invalid/in-use minor(%d), assigning next available!\n", | |
10361 | + node->volume_info->volume_name, | |
10362 | + requested_minor); | |
10363 | + /* | |
10364 | + * requested minor is already | |
10365 | + * in use, defer assignment | |
10366 | + * until later. | |
10367 | + */ | |
10368 | + node->volume_info->volume_minor = 0; | |
10369 | + } | |
10370 | + } | |
10371 | + } | |
10372 | + } | |
10373 | +} | |
10374 | + | |
10375 | +/* | |
10376 | + * function: eelv_assign_remaining_evms_volume_minors | |
10377 | + * | |
10378 | + * This is a support function for evms_export_logical_volumes. | |
10379 | + * This routine assigns minor numbers to new evms volumes that | |
10380 | + * have no/conflicting minor assignments. This function will | |
10381 | + * search from high(255) minor values down, for the first available | |
10382 | + * minor. Searching high to low minimizes the possibility of | |
10383 | + * conflicting evms volumes causing "compatibility" minor | |
10384 | + * assignments to shift from expected assignments. | |
10385 | + * | |
10386 | + */ | |
10387 | +static void | |
10388 | +eelv_assign_remaining_evms_volume_minors(struct evms_logical_node | |
10389 | + **discover_list) | |
10390 | +{ | |
10391 | + struct evms_logical_node *next_node, *node; | |
10392 | + int requested_minor, node_removed; | |
10393 | + | |
10394 | + for (node = *discover_list; node; node = next_node) { | |
10395 | + next_node = node->next; | |
10396 | + | |
10397 | + node_removed = FALSE; | |
10398 | + /* only process evms volumes */ | |
10399 | + /* all remaining evms volumes should now | |
10400 | + * have a minor value of 0, meaning they | |
10401 | + * had no minor assignment, or their minor | |
10402 | + * assignment conflicted with an existing | |
10403 | + * minor assignment. | |
10404 | + */ | |
10405 | + if (node->flags & EVMS_VOLUME_FLAG) { | |
10406 | + evms_cs_remove_logical_node_from_list(discover_list, | |
10407 | + node); | |
10408 | + node_removed = TRUE; | |
10409 | + /* find next available minor number */ | |
10410 | + for (requested_minor = 255; | |
10411 | + (evms_logical_volumes[requested_minor].node || | |
10412 | + evms_logical_volumes[requested_minor].name) && | |
10413 | + requested_minor; requested_minor--) ; | |
10414 | + /* check range of assigned minor */ | |
10415 | + if (!requested_minor) { | |
10416 | + LOG_CRITICAL | |
10417 | + ("no more minor numbers available for evms volumes!!!!\n"); | |
10418 | + DELETE(node); | |
10419 | + } else | |
10420 | + /* assign requested minor */ | |
10421 | + eelv_assign_volume_minor(node, requested_minor); | |
10422 | + } | |
10423 | + } | |
10424 | +} | |
10425 | + | |
10426 | +/* | |
10427 | + * function: eelv_assign_remaining_volume_minors | |
10428 | + * | |
10429 | + * This is a support function for evms_export_logical_volumes. | |
10430 | + * This routine assigns minor numbers to all remaining unassigned | |
10431 | + * volumes. Minor numbers are assigned on an availability | |
10432 | + * basis. The first free minor number is used in the assignment. | |
10433 | + * | |
10434 | + */ | |
10435 | +static void | |
10436 | +eelv_assign_remaining_volume_minors(struct evms_logical_node **discover_list) | |
10437 | +{ | |
10438 | + struct evms_logical_node *node; | |
10439 | + int minor; | |
10440 | + | |
10441 | + while (*discover_list) { | |
10442 | + node = *discover_list; | |
10443 | + evms_cs_remove_logical_node_from_list(discover_list, node); | |
10444 | + | |
10445 | + /* find next available minor number */ | |
10446 | + for (minor = 1; | |
10447 | + (evms_logical_volumes[minor].node || | |
10448 | + evms_logical_volumes[minor].name) && | |
10449 | + minor < MAX_EVMS_VOLUMES; minor++) ; | |
10450 | + | |
10451 | + if (minor >= MAX_EVMS_VOLUMES) { | |
10452 | + LOG_CRITICAL | |
10453 | + ("no more minor numbers available for compatibility volumes!!!!\n"); | |
10454 | + DELETE(node); | |
10455 | + } else | |
10456 | + /* assign minor */ | |
10457 | + eelv_assign_volume_minor(node, minor); | |
10458 | + } | |
10459 | +} | |
10460 | + | |
10461 | +/* | |
10462 | + * function: eelv_check_for_unreassign_soft_deleted_volume | |
10463 | + * | |
10464 | + * This is a support function for evms_export_logical_volumes. | |
10465 | + * This routine reports any "soft deleted" volumes that were not | |
10466 | + * found after a rediscovery. | |
10467 | + */ | |
10468 | +static void | |
10469 | +eelv_check_for_unreassign_soft_deleted_volume(void) | |
10470 | +{ | |
10471 | + struct evms_logical_volume *lv; | |
10472 | + int i; | |
10473 | + | |
10474 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10475 | + lv = &evms_logical_volumes[i]; | |
10476 | + /* only check soft deleted volumes: | |
10477 | + * they have a NULL node ptr & | |
10478 | + * they have a non-NULL name. | |
10479 | + */ | |
10480 | + if (lv->flags & EVMS_VOLUME_SOFT_DELETED) { | |
10481 | + if (is_open(i)) | |
10482 | + lv->flags |= EVMS_VOLUME_CORRUPT; | |
10483 | + LOG_ERROR | |
10484 | + ("error: rediscovery failed to find %smounted 'soft deleted' volume(%u,%u,%s)...\n", | |
10485 | + ((lv->flags & EVMS_VOLUME_CORRUPT) ? "" : "un"), | |
10486 | + EVMS_MAJOR, i, lv->name); | |
10487 | + if (lv->flags & EVMS_VOLUME_CORRUPT) { | |
10488 | + LOG_ERROR | |
10489 | + (" flagging volume(%u,%u,%s) as CORRUPT!\n", | |
10490 | + EVMS_MAJOR, i, lv->name); | |
10491 | + } else { | |
10492 | + LOG_ERROR | |
10493 | + (" releasing minor(%d) used by volume(%s)!\n", | |
10494 | + i, lv->name); | |
10495 | + /* clear logical volume structure | |
10496 | + * for this volume so it may be | |
10497 | + * reused. | |
10498 | + */ | |
10499 | + kfree(lv->name); | |
10500 | + lv->name = NULL; | |
10501 | + lv->flags = 0; | |
10502 | + } | |
10503 | + } | |
10504 | + } | |
10505 | +} | |
10506 | + | |
10507 | +static void | |
10508 | +eelv_unquiesce_volumes(void) | |
10509 | +{ | |
10510 | + int i; | |
10511 | + | |
10512 | + /* check each volume array entry */ | |
10513 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10514 | + struct evms_logical_volume *volume; | |
10515 | + | |
10516 | + volume = &evms_logical_volumes[i]; | |
10517 | + /* is this volume "quiesced" ? */ | |
10518 | + if (volume->quiesced) { | |
10519 | + int rc = 1; | |
10520 | + if (volume->node) { | |
10521 | + /* "unquiesce" it */ | |
10522 | + struct inode inode; | |
10523 | + struct evms_quiesce_vol_pkt qv; | |
10524 | + | |
10525 | + qv.command = qv.status = 0; | |
10526 | + qv.do_vfs = 0; | |
10527 | + qv.minor = i; | |
10528 | + rc = evms_quiesce_volume(volume, &inode, NULL, | |
10529 | + &qv); | |
10530 | + } | |
10531 | + /* Wake up any waiters */ | |
10532 | + if (rc) { | |
10533 | + /* clear the flag */ | |
10534 | + volume->quiesced = 0; | |
10535 | + /* wake up the waiters */ | |
10536 | + if (waitqueue_active(&volume->wait_queue)) | |
10537 | + wake_up(&volume->wait_queue); | |
10538 | +#ifdef VFS_PATCH_PRESENT | |
10539 | + /* unquiesce VFS if quiesced */ | |
10540 | + if (volume->vfs_quiesced) { | |
10541 | + /* VFS function call to unlock the filesystem */ | |
10542 | + unlockfs(MKDEV(EVMS_MAJOR, i)); | |
10543 | + volume->vfs_quiesced = FALSE; | |
10544 | + } | |
10545 | +#endif | |
10546 | + } | |
10547 | + } | |
10548 | + } | |
10549 | +} | |
10550 | + | |
10551 | +/* | |
10552 | + * Function: evms_export_logical_volumes | |
10553 | + * | |
10554 | + * This function is called from evms_discover_volumes. It | |
10555 | + * check for duplicate volumes, assigns minor values to evms | |
10556 | + * volumes, and assigns minor values to the remaining volumes. | |
10557 | + * In addition to assigning minor values to each volume this | |
10558 | + * function also completes the final steps necessary to allow | |
10559 | + * the volumes to be using by the operating system. | |
10560 | + */ | |
10561 | +static void | |
10562 | +evms_export_logical_volumes(struct evms_logical_node **discover_list) | |
10563 | +{ | |
10564 | + LOG_EXTRA("exporting EVMS logical volumes...\n"); | |
10565 | + | |
10566 | + eelv_check_for_duplicity(discover_list); | |
10567 | + | |
10568 | + eelv_reassign_soft_deleted_volume_minors(discover_list); | |
10569 | + | |
10570 | + eelv_assign_evms_volume_minors(discover_list); | |
10571 | + | |
10572 | + eelv_assign_remaining_evms_volume_minors(discover_list); | |
10573 | + | |
10574 | + eelv_assign_remaining_volume_minors(discover_list); | |
10575 | + | |
10576 | + eelv_check_for_unreassign_soft_deleted_volume(); | |
10577 | + | |
10578 | + /* "unquiesce" any "quiesced" volumes */ | |
10579 | + eelv_unquiesce_volumes(); | |
10580 | +} | |
10581 | + | |
10582 | +static int | |
10583 | +edv_populate_discover_list(struct evms_list_node *src_list, | |
10584 | + struct evms_logical_node **trg_list, | |
10585 | + struct evms_rediscover_pkt *discover_parms) | |
10586 | +{ | |
10587 | + int rc = 0, i, move_node, use_all_disks = FALSE; | |
10588 | + struct evms_list_node *src_node; | |
10589 | + struct evms_logical_node *disk_node = NULL; | |
10590 | + | |
10591 | + /* if no discover parameters are specified */ | |
10592 | + /* copy ALL the disk nodes into the */ | |
10593 | + /* discovery list. */ | |
10594 | + if ((discover_parms == NULL) || | |
10595 | + (discover_parms->drive_count == REDISCOVER_ALL_DEVICES)) | |
10596 | + use_all_disks = TRUE; | |
10597 | + | |
10598 | + /* copy the disk nodes specified in the */ | |
10599 | + /* discover_parms over to a discover list */ | |
10600 | + src_node = src_list; | |
10601 | + while (src_node) { | |
10602 | + move_node = use_all_disks; | |
10603 | + if (move_node == FALSE) | |
10604 | + /* check the rediscovery array */ | |
10605 | + for (i = 0; i < discover_parms->drive_count; i++) { | |
10606 | + disk_node = | |
10607 | + DEV_HANDLE_TO_NODE(discover_parms-> | |
10608 | + drive_array[i]); | |
10609 | + if (disk_node == src_node->item) { | |
10610 | + move_node = TRUE; | |
10611 | + break; | |
10612 | + } | |
10613 | + } | |
10614 | + /* check to see if we want this node */ | |
10615 | + if (move_node == TRUE) | |
10616 | + evms_cs_add_logical_node_to_list(trg_list, | |
10617 | + (struct | |
10618 | + evms_logical_node *) | |
10619 | + src_node->item); | |
10620 | + /* advance to next struct evms_list_node */ | |
10621 | + src_node = src_node->next; | |
10622 | + } | |
10623 | + return (rc); | |
10624 | +} | |
10625 | + | |
10626 | +static int | |
10627 | +evms_discover_volumes(struct evms_rediscover_pkt *discover_parms) | |
10628 | +{ | |
10629 | + int rc = 0; | |
10630 | + struct evms_logical_node *discover_list = NULL; | |
10631 | + | |
10632 | + evms_discover_logical_disks(&discover_list); | |
10633 | + if (evms_global_device_list) { | |
10634 | + /* move the appropriate disk nodes, based on */ | |
10635 | + /* on the discover parameters, onto the */ | |
10636 | + /* discover list for the partition managers */ | |
10637 | + /* to process */ | |
10638 | + edv_populate_discover_list(evms_global_device_list, | |
10639 | + &discover_list, discover_parms); | |
10640 | + } | |
10641 | + if (discover_list) { | |
10642 | +#ifdef LOCAL_DEBUG | |
10643 | + display_discover_list(discover_list, "after dev mgrs"); | |
10644 | +#endif | |
10645 | + evms_discover_logical_partitions(&discover_list); | |
10646 | + } | |
10647 | + if (discover_list) { | |
10648 | +#ifdef LOCAL_DEBUG | |
10649 | + display_discover_list(discover_list, "after seg mgrs"); | |
10650 | +#endif | |
10651 | + evms_discover_volume_groups(&discover_list); | |
10652 | + } | |
10653 | + if (discover_list) { | |
10654 | +#ifdef LOCAL_DEBUG | |
10655 | + display_discover_list(discover_list, "after reg mgrs"); | |
10656 | +#endif | |
10657 | + evms_discover_evms_features(&discover_list); | |
10658 | + } | |
10659 | + if (discover_list) { | |
10660 | +#ifdef LOCAL_DEBUG | |
10661 | + display_discover_list(discover_list, "after features"); | |
10662 | +#endif | |
10663 | + evms_export_logical_volumes(&discover_list); | |
10664 | + evms_cs_signal_event(EVMS_EVENT_END_OF_DISCOVERY); | |
10665 | + } | |
10666 | + return (rc); | |
10667 | +} | |
10668 | + | |
10669 | +/* function: evms_notify_reboot | |
10670 | + * | |
10671 | + * this function gets called at shutdown time and is used | |
10672 | + * to remove any evms controlled volumes from memory, thus | |
10673 | + * allowing any plugins needing to flush internal caches | |
10674 | + * to do so. | |
10675 | + */ | |
10676 | +int | |
10677 | +evms_notify_reboot(struct notifier_block *this, unsigned long code, void *x) | |
10678 | +{ | |
10679 | + int i; | |
10680 | + struct evms_logical_volume *volume; | |
10681 | + | |
10682 | + switch (code) { | |
10683 | + case SYS_DOWN: | |
10684 | + case SYS_HALT: | |
10685 | + case SYS_POWER_OFF: | |
10686 | + LOG_DEFAULT("stopping all evms controlled volumes.\n"); | |
10687 | + | |
10688 | + /* quiesce all volumes */ | |
10689 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10690 | + struct evms_quiesce_vol_pkt qv; | |
10691 | + struct inode inode; | |
10692 | + | |
10693 | + volume = &evms_logical_volumes[i]; | |
10694 | + if (!volume->node) | |
10695 | + continue; | |
10696 | + qv.command = 1; // quiesce | |
10697 | + qv.minor = i; // | |
10698 | + qv.status = 0; // reset status | |
10699 | + qv.do_vfs = 0; | |
10700 | + evms_quiesce_volume(volume, &inode, NULL, &qv); | |
10701 | + } | |
10702 | + /* delete all volumes | |
10703 | + * | |
10704 | + * to ensure this work under the | |
10705 | + * most circumstances, a "soft" | |
10706 | + * delete will be done. this will | |
10707 | + * handle the strange case of a | |
10708 | + * volume still being mounted. | |
10709 | + */ | |
10710 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10711 | + struct evms_delete_vol_pkt dv; | |
10712 | + | |
10713 | + volume = &evms_logical_volumes[i]; | |
10714 | + if (!volume->node) | |
10715 | + continue; | |
10716 | + /* only delete quiesced volumes */ | |
10717 | + if (!volume->quiesced) | |
10718 | + continue; | |
10719 | + /* delete the volume from memory. | |
10720 | + * do a 'soft' delete if volume | |
10721 | + * is mounted, and 'hard' delete | |
10722 | + * if it is not. | |
10723 | + */ | |
10724 | + dv.command = is_open(i); | |
10725 | + dv.minor = i; | |
10726 | + dv.status = 0; | |
10727 | + evms_delete_volume(volume, &dv); | |
10728 | + } | |
10729 | + } | |
10730 | + return NOTIFY_DONE; | |
10731 | +} | |
10732 | + | |
10733 | +static struct notifier_block evms_notifier = { | |
10734 | + .notifier_call = evms_notify_reboot, | |
10735 | + .next = NULL, | |
10736 | + .priority = INT_MAX, /* before any real devices */ | |
10737 | +}; | |
10738 | + | |
10739 | +/* | |
10740 | + * Function: find_root_fs_dev | |
10741 | + * If "root=/dev/evms/???" was specified on the kernel command line, and devfs | |
10742 | + * is not enabled, we need to determine the appropriate minor number for the | |
10743 | + * specified volume for the root fs. | |
10744 | + */ | |
10745 | +static void | |
10746 | +find_root_fs_dev(void) | |
10747 | +{ | |
10748 | +#ifndef MODULE | |
10749 | + char root_name[64] = { 0 }; | |
10750 | + char *name; | |
10751 | + int i; | |
10752 | + | |
10753 | +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,18) | |
10754 | + strncpy(root_name, root_device_name, 63); | |
10755 | +#else | |
10756 | + get_root_device_name(root_name); | |
10757 | +#endif | |
10758 | + | |
10759 | + if (!strncmp(root_name, EVMS_DIR_NAME "/", strlen(EVMS_DIR_NAME) + 1)) { | |
10760 | + name = &root_name[strlen(EVMS_DIR_NAME) + 1]; | |
10761 | + | |
10762 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10763 | + if (evms_logical_volumes[i].name && | |
10764 | + !strncmp(name, evms_logical_volumes[i].name, | |
10765 | + strlen(evms_logical_volumes[i].name))) { | |
10766 | + ROOT_DEV = MKDEV(EVMS_MAJOR, i); | |
10767 | + return; | |
10768 | + } | |
10769 | + } | |
10770 | + } | |
10771 | +#endif | |
10772 | +} | |
10773 | + | |
10774 | +/* | |
10775 | + * Function: bh_cache_ctor | |
10776 | + * this function initializes the b_wait field in the buffer heads | |
10777 | + * in our private buffer head pool. | |
10778 | + */ | |
10779 | +static void | |
10780 | +io_notify_cache_ctor(void *foo, kmem_cache_t * cachep, unsigned long flags) | |
10781 | +{ | |
10782 | + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | |
10783 | + SLAB_CTOR_CONSTRUCTOR) { | |
10784 | + io_notify_t *io_notify = (io_notify_t *) foo; | |
10785 | + memset(io_notify, 0, sizeof (*io_notify)); | |
10786 | + } | |
10787 | +} | |
10788 | + | |
10789 | +/* | |
10790 | + * Function: bh_cache_ctor | |
10791 | + * this function initializes the b_wait field in the buffer heads | |
10792 | + * in our private buffer head pool. | |
10793 | + */ | |
10794 | +static void | |
10795 | +bh_cache_ctor(void *foo, kmem_cache_t * cachep, unsigned long flags) | |
10796 | +{ | |
10797 | + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == | |
10798 | + SLAB_CTOR_CONSTRUCTOR) { | |
10799 | + struct buffer_head *bh = (struct buffer_head *) foo; | |
10800 | + memset(bh, 0, sizeof (*bh)); | |
10801 | + init_waitqueue_head(&bh->b_wait); | |
10802 | + } | |
10803 | +} | |
10804 | + | |
10805 | +/* | |
10806 | + * Function: evms_init_module | |
10807 | + * This function runs once at system initialization. | |
10808 | + */ | |
10809 | +static int __init | |
10810 | +evms_init_module(void) | |
10811 | +{ | |
10812 | + int rc = 0, i; | |
10813 | + int *evms_blocksizes; | |
10814 | + | |
10815 | + LOG_DEFAULT("EVMS v%d.%d.%d initializing .... info level(%d).\n", | |
10816 | + EVMS_MAJOR_VERSION, | |
10817 | + EVMS_MINOR_VERSION, | |
10818 | + EVMS_PATCHLEVEL_VERSION, evms_info_level); | |
10819 | + | |
10820 | + /* initialize memory management counters */ | |
10821 | + evms_allocs = (atomic_t) ATOMIC_INIT(0); | |
10822 | + evms_logical_nodes = (atomic_t) ATOMIC_INIT(0); | |
10823 | + | |
10824 | + /* initialize the io_notify_entry pool */ | |
10825 | + if (!rc) | |
10826 | + evms_io_notify_pool = evms_cs_create_pool(sizeof (io_notify_t), | |
10827 | + "EVMS IO Notify", | |
10828 | + io_notify_cache_ctor, | |
10829 | + NULL); | |
10830 | + | |
10831 | + /* initialize the "public" buffer_head pool */ | |
10832 | + if (!rc) | |
10833 | + evms_bh_pool = evms_cs_create_pool(sizeof (struct buffer_head), | |
10834 | + "EVMS BH", | |
10835 | + bh_cache_ctor, NULL); | |
10836 | + | |
10837 | + /* allocate the logical volume array */ | |
10838 | + if (!rc) | |
10839 | + evms_logical_volumes = | |
10840 | + kmalloc(sizeof (struct evms_logical_volume) * | |
10841 | + MAX_EVMS_VOLUMES, GFP_KERNEL); | |
10842 | + if (!evms_logical_volumes) { | |
10843 | + rc = -ENOMEM; | |
10844 | + } | |
10845 | + | |
10846 | + /* initialize the logical volume array entries */ | |
10847 | + if (!rc) { | |
10848 | + memset(evms_logical_volumes, 0, | |
10849 | + sizeof (struct evms_logical_volume) * MAX_EVMS_VOLUMES); | |
10850 | + for (i = 1; i < MAX_EVMS_VOLUMES; i++) { | |
10851 | + struct evms_logical_volume *volume; | |
10852 | + | |
10853 | + volume = &evms_logical_volumes[i]; | |
10854 | + init_waitqueue_head(&volume->wait_queue); | |
10855 | + volume->requests_in_progress = | |
10856 | + (atomic_t) ATOMIC_INIT(0); | |
10857 | +#ifdef CONFIG_SMP | |
10858 | + blk_init_queue(&volume->request_queue, | |
10859 | + evms_do_request_fn); | |
10860 | + blk_queue_make_request(&volume->request_queue, | |
10861 | + evms_make_request_fn); | |
10862 | +#endif | |
10863 | + } | |
10864 | + } | |
10865 | + | |
10866 | + /* allocate EVMS' blk_size array */ | |
10867 | + if (!rc) { | |
10868 | + evms_blocksizes = kmalloc(MAX_EVMS_VOLUMES * | |
10869 | + sizeof (int), GFP_KERNEL); | |
10870 | + if (!evms_blocksizes) { | |
10871 | + rc = -ENOMEM; | |
10872 | + LOG_CRITICAL | |
10873 | + ("can't allocate memory for EVMS blk_size\n"); | |
10874 | + } else { | |
10875 | + memset(evms_blocksizes, 0, | |
10876 | + MAX_EVMS_VOLUMES * sizeof (int)); | |
10877 | + blk_size[EVMS_MAJOR] = evms_blocksizes; | |
10878 | + } | |
10879 | + } | |
10880 | + | |
10881 | + /* allocate EVMS' blksize_size array */ | |
10882 | + if (!rc) { | |
10883 | + evms_blocksizes = kmalloc(MAX_EVMS_VOLUMES * | |
10884 | + sizeof (int), GFP_KERNEL); | |
10885 | + if (!evms_blocksizes) { | |
10886 | + rc = -ENOMEM; | |
10887 | + LOG_CRITICAL | |
10888 | + ("can't allocate memory for EVMS blksize_size\n"); | |
10889 | + } else { | |
10890 | + memset(evms_blocksizes, 0, | |
10891 | + MAX_EVMS_VOLUMES * sizeof (int)); | |
10892 | + blksize_size[EVMS_MAJOR] = evms_blocksizes; | |
10893 | + } | |
10894 | + } | |
10895 | + | |
10896 | + /* allocate EVMS' hardsect_size array */ | |
10897 | + if (!rc) { | |
10898 | + evms_blocksizes = kmalloc(MAX_EVMS_VOLUMES * | |
10899 | + sizeof (int), GFP_KERNEL); | |
10900 | + if (!evms_blocksizes) { | |
10901 | + rc = -ENOMEM; | |
10902 | + LOG_CRITICAL | |
10903 | + ("can't allocate memory for EVMS hardsect_size\n"); | |
10904 | + } else { | |
10905 | + memset(evms_blocksizes, 0, | |
10906 | + MAX_EVMS_VOLUMES * sizeof (int)); | |
10907 | + hardsect_size[EVMS_MAJOR] = evms_blocksizes; | |
10908 | + } | |
10909 | + } | |
10910 | + | |
10911 | + /* Register the block device */ | |
10912 | + if (!rc) { | |
10913 | + rc = devfs_register_blkdev(EVMS_MAJOR, EVMS_DIR_NAME, | |
10914 | + &evms_fops); | |
10915 | + if (rc) { | |
10916 | + LOG_CRITICAL | |
10917 | + ("error calling devfs_register_blkdev() err=%u\n", | |
10918 | + rc); | |
10919 | + rc = -EINVAL; | |
10920 | + } | |
10921 | + } | |
10922 | + | |
10923 | + /* Register with devfs */ | |
10924 | + if (!rc) { | |
10925 | + evms_dir_devfs_handle = devfs_mk_dir(NULL, EVMS_DIR_NAME, NULL); | |
10926 | + // A NULL return cannot be fatal. | |
10927 | + // Devfs just might not be running | |
10928 | + if (!evms_dir_devfs_handle) { | |
10929 | + LOG_EXTRA | |
10930 | + ("NULL return from devfs_mk_dir() for \"%s\"\n", | |
10931 | + EVMS_DIR_NAME); | |
10932 | + LOG_EXTRA("Is devfs enabled?\n"); | |
10933 | + } else { | |
10934 | + evms_blk_devfs_handle = | |
10935 | + devfs_register(evms_dir_devfs_handle, EVMS_DEV_NAME, | |
10936 | + DEVFS_FL_DEFAULT, EVMS_MAJOR, 0, | |
10937 | + S_IFBLK | S_IRUGO | S_IWUGO, | |
10938 | + &evms_fops, NULL); | |
10939 | + if (!evms_blk_devfs_handle) { | |
10940 | + LOG_DETAILS | |
10941 | + ("NULL return from devfs_register() for \"%s\"\n", | |
10942 | + EVMS_DEV_NAME); | |
10943 | + } | |
10944 | + } | |
10945 | + } | |
10946 | + | |
10947 | + if (!rc) { | |
10948 | + read_ahead[EVMS_MAJOR] = 4096; | |
10949 | +#ifdef CONFIG_SMP | |
10950 | + blk_dev[EVMS_MAJOR].queue = evms_find_queue; | |
10951 | +#else | |
10952 | + blk_init_queue(BLK_DEFAULT_QUEUE(EVMS_MAJOR), | |
10953 | + evms_do_request_fn); | |
10954 | + blk_queue_make_request(BLK_DEFAULT_QUEUE(EVMS_MAJOR), | |
10955 | + evms_make_request_fn); | |
10956 | +#endif | |
10957 | +#ifdef CONFIG_PROC_FS | |
10958 | + evms_cs_get_evms_proc_dir(); | |
10959 | + if (evms_proc_dir) { | |
10960 | + create_proc_read_entry("info", 0, evms_proc_dir, | |
10961 | + evms_info_read_proc, NULL); | |
10962 | + create_proc_read_entry("plugins", 0, evms_proc_dir, | |
10963 | + evms_plugins_read_proc, NULL); | |
10964 | + create_proc_read_entry("volumes", 0, evms_proc_dir, | |
10965 | + evms_volumes_read_proc, NULL); | |
10966 | + } | |
10967 | + evms_table_header = register_sysctl_table(dev_dir_table, 1); | |
10968 | +#endif | |
10969 | + /* Register for reboot notification */ | |
10970 | + register_reboot_notifier(&evms_notifier); | |
10971 | + | |
10972 | +#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) | |
10973 | + /* Register evms 32bit ioctl handlers */ | |
10974 | + lock_kernel(); | |
10975 | + register_ioctl32_conversion(EVMS_GET_INFO_LEVEL,NULL); | |
10976 | + register_ioctl32_conversion(EVMS_SET_INFO_LEVEL,NULL); | |
10977 | + register_ioctl32_conversion(EVMS_REDISCOVER_VOLUMES_32, | |
10978 | + evms_rediscover); | |
10979 | + register_ioctl32_conversion(EVMS_DELETE_VOLUME,NULL); | |
10980 | + register_ioctl32_conversion(EVMS_PLUGIN_IOCTL_32, | |
10981 | + evms_plugin_ioctl); | |
10982 | + register_ioctl32_conversion(EVMS_PROCESS_NOTIFY_EVENT,NULL); | |
10983 | + register_ioctl32_conversion(EVMS_GET_LOGICAL_DISK,NULL); | |
10984 | + register_ioctl32_conversion(EVMS_GET_LOGICAL_DISK_INFO,NULL); | |
10985 | + register_ioctl32_conversion(EVMS_SECTOR_IO_32, evms_sector_io); | |
10986 | + register_ioctl32_conversion(EVMS_GET_MINOR,NULL); | |
10987 | + register_ioctl32_conversion(EVMS_GET_VOLUME_DATA,NULL); | |
10988 | + register_ioctl32_conversion(EVMS_GET_PLUGIN,NULL); | |
10989 | + register_ioctl32_conversion(EVMS_COMPUTE_CSUM_32, | |
10990 | + evms_compute_csum); | |
10991 | + register_ioctl32_conversion(EVMS_GET_BMAP,NULL); | |
10992 | + register_ioctl32_conversion(EVMS_GET_IOCTL_VERSION,NULL); | |
10993 | + register_ioctl32_conversion(EVMS_GET_VERSION,NULL); | |
10994 | + register_ioctl32_conversion(EVMS_UPDATE_DEVICE_INFO,NULL); | |
10995 | + register_ioctl32_conversion(EVMS_CHECK_MOUNT_STATUS,NULL); | |
10996 | + register_ioctl32_conversion(EVMS_GET_VOL_STRIPE_INFO,NULL); | |
10997 | + unlock_kernel(); | |
10998 | +#endif | |
10999 | + | |
11000 | + } | |
11001 | + | |
11002 | + return rc; | |
11003 | +} | |
11004 | + | |
11005 | +/* | |
11006 | + * Function: evms_exit_module | |
11007 | + * This function runs once when the EVMS core module is unloaded. | |
11008 | + */ | |
11009 | +static void __exit | |
11010 | +evms_exit_module(void) | |
11011 | +{ | |
11012 | + LOG_DEFAULT("EVMS v%d.%d.%d unloading ....\n", | |
11013 | + EVMS_MAJOR_VERSION, | |
11014 | + EVMS_MINOR_VERSION, EVMS_PATCHLEVEL_VERSION); | |
11015 | + | |
11016 | +#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64) | |
11017 | + /* Un-Register evms 32bit ioctl handlers */ | |
11018 | + lock_kernel(); | |
11019 | + unregister_ioctl32_conversion(EVMS_GET_INFO_LEVEL); | |
11020 | + unregister_ioctl32_conversion(EVMS_SET_INFO_LEVEL); | |
11021 | + unregister_ioctl32_conversion(EVMS_REDISCOVER_VOLUMES_32); | |
11022 | + unregister_ioctl32_conversion(EVMS_DELETE_VOLUME); | |
11023 | + unregister_ioctl32_conversion(EVMS_PLUGIN_IOCTL_32); | |
11024 | + unregister_ioctl32_conversion(EVMS_PROCESS_NOTIFY_EVENT); | |
11025 | + unregister_ioctl32_conversion(EVMS_GET_LOGICAL_DISK); | |
11026 | + unregister_ioctl32_conversion(EVMS_GET_LOGICAL_DISK_INFO); | |
11027 | + unregister_ioctl32_conversion(EVMS_SECTOR_IO_32); | |
11028 | + unregister_ioctl32_conversion(EVMS_GET_MINOR); | |
11029 | + unregister_ioctl32_conversion(EVMS_GET_VOLUME_DATA); | |
11030 | + unregister_ioctl32_conversion(EVMS_GET_PLUGIN); | |
11031 | + unregister_ioctl32_conversion(EVMS_COMPUTE_CSUM_32); | |
11032 | + unregister_ioctl32_conversion(EVMS_GET_BMAP); | |
11033 | + unregister_ioctl32_conversion(EVMS_GET_IOCTL_VERSION); | |
11034 | + unregister_ioctl32_conversion(EVMS_GET_VERSION); | |
11035 | + unregister_ioctl32_conversion(EVMS_UPDATE_DEVICE_INFO); | |
11036 | + unregister_ioctl32_conversion(EVMS_CHECK_MOUNT_STATUS); | |
11037 | + unregister_ioctl32_conversion(EVMS_GET_VOL_STRIPE_INFO); | |
11038 | + unlock_kernel(); | |
11039 | +#endif | |
11040 | + | |
11041 | + /* unregister with devfs | |
11042 | + */ | |
11043 | + devfs_unregister(evms_dir_devfs_handle); | |
11044 | + /* clean up the queue for the block device | |
11045 | + */ | |
11046 | + blk_cleanup_queue(blk_get_queue(MKDEV(EVMS_MAJOR, 0))); | |
11047 | + /* unregister block device | |
11048 | + */ | |
11049 | + devfs_unregister_blkdev(EVMS_MAJOR, EVMS_DIR_NAME); | |
11050 | + /* deallocate device arrays | |
11051 | + */ | |
11052 | + kfree(blk_size[EVMS_MAJOR]); | |
11053 | + blk_size[EVMS_MAJOR] = NULL; | |
11054 | + kfree(blksize_size[EVMS_MAJOR]); | |
11055 | + blksize_size[EVMS_MAJOR] = NULL; | |
11056 | + kfree(hardsect_size[EVMS_MAJOR]); | |
11057 | + hardsect_size[EVMS_MAJOR] = NULL; | |
11058 | + read_ahead[EVMS_MAJOR] = 0; | |
11059 | + /* deallocate logical volumes array | |
11060 | + */ | |
11061 | + kfree(evms_logical_volumes); | |
11062 | + /* destroy buffer head pool | |
11063 | + */ | |
11064 | + evms_cs_destroy_pool(evms_bh_pool); | |
11065 | + /* destroy io notify pool | |
11066 | + */ | |
11067 | + evms_cs_destroy_pool(evms_io_notify_pool); | |
11068 | +#ifdef CONFIG_PROC_FS | |
11069 | + if (evms_proc_dir) { | |
11070 | + remove_proc_entry("volumes", evms_proc_dir); | |
11071 | + remove_proc_entry("plugins", evms_proc_dir); | |
11072 | + remove_proc_entry("info", evms_proc_dir); | |
11073 | + remove_proc_entry("evms", NULL); | |
11074 | + } | |
11075 | + unregister_sysctl_table(evms_table_header); | |
11076 | +#endif | |
11077 | +} | |
11078 | + | |
11079 | +/* | |
11080 | + * Function: evms_init_discover | |
11081 | + * If EVMS is statically built into the kernel, this function will be called | |
11082 | + * to perform an initial volume discovery. | |
11083 | + */ | |
11084 | +int __init | |
11085 | +evms_init_discover(void) | |
11086 | +{ | |
11087 | + /* go find volumes */ | |
11088 | + evms_discover_volumes(NULL); | |
11089 | + | |
11090 | + /* Check if the root fs is on EVMS */ | |
11091 | + if (MAJOR(ROOT_DEV) == EVMS_MAJOR) { | |
11092 | + find_root_fs_dev(); | |
11093 | + } | |
11094 | + | |
11095 | + return 0; | |
11096 | +} | |
11097 | + | |
11098 | +/* | |
11099 | + * a placeholder for cluster enablement | |
11100 | + */ | |
11101 | +void | |
11102 | +evms_cluster_init(int nodeid, int clusterid) | |
11103 | +{ | |
11104 | + /* dummy */ | |
11105 | + return; | |
11106 | +} | |
11107 | + | |
11108 | +EXPORT_SYMBOL(evms_cluster_init); | |
11109 | + | |
11110 | +/* | |
11111 | + * a placeholder for cluster enablement | |
11112 | + */ | |
11113 | +int | |
11114 | +evms_cluster_shutdown(void) | |
11115 | +{ | |
11116 | + /* dummy */ | |
11117 | + return -1; | |
11118 | +} | |
11119 | + | |
11120 | +EXPORT_SYMBOL(evms_cluster_shutdown); | |
11121 | + | |
11122 | +static int __init | |
11123 | +evms_boot_info_level(char *str) | |
11124 | +{ | |
11125 | + int evms_boot_info_level = (int) simple_strtoul(str, NULL, 10); | |
11126 | + if (evms_boot_info_level) { | |
11127 | + evms_info_level = evms_boot_info_level; | |
11128 | + } | |
11129 | + return 1; | |
11130 | +} | |
11131 | + | |
11132 | +__setup("evms_info_level=", evms_boot_info_level); | |
11133 | +module_init(evms_init_module); | |
11134 | +module_exit(evms_exit_module); | |
11135 | +__initcall(evms_init_discover); | |
11136 | +#ifdef MODULE_LICENSE | |
11137 | +MODULE_LICENSE("GPL"); | |
11138 | +#endif | |
11139 | + | |
11140 | +/**********************************************************/ | |
11141 | +/* END -- INIT/DISCOVERY support functions */ | |
11142 | +/**********************************************************/ | |
11143 | diff -Naur linux-2002-09-30/drivers/evms/evms_bbr.c evms-2002-09-30/drivers/evms/evms_bbr.c | |
11144 | --- linux-2002-09-30/drivers/evms/evms_bbr.c Wed Dec 31 18:00:00 1969 | |
11145 | +++ evms-2002-09-30/drivers/evms/evms_bbr.c Wed Sep 25 15:04:22 2002 | |
11146 | @@ -0,0 +1,1817 @@ | |
11147 | +/* -*- linux-c -*- */ | |
11148 | +/* | |
11149 | + * Copyright (c) International Business Machines Corp., 2000 | |
11150 | + * | |
11151 | + * This program is free software; you can redistribute it and/or modify | |
11152 | + * it under the terms of the GNU General Public License as published by | |
11153 | + * the Free Software Foundation; either version 2 of the License, or | |
11154 | + * (at your option) any later version. | |
11155 | + * | |
11156 | + * This program is distributed in the hope that it will be useful, | |
11157 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11158 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
11159 | + * the GNU General Public License for more details. | |
11160 | + * | |
11161 | + * You should have received a copy of the GNU General Public License | |
11162 | + * along with this program; if not, write to the Free Software | |
11163 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
11164 | + */ | |
11165 | +/* linux/driver/evms/evms_bbr.c | |
11166 | + * | |
11167 | + * EVMS - Bad Block Relocation (BBR) Feature Plugin | |
11168 | + * | |
11169 | + * BBR feature is designed to remap I/O write failures to another safe location | |
11170 | + * on disk. Note that most disk drives have BBR built into them, this means | |
11171 | + * that our software BBR will be only activated when all hardware BBR | |
11172 | + * replacement sectors have been used. | |
11173 | + */ | |
11174 | + | |
11175 | +#define LOG_PREFIX "bbr: " | |
11176 | + | |
11177 | +#include <linux/config.h> | |
11178 | +#include <linux/kernel.h> | |
11179 | +#include <linux/module.h> | |
11180 | +#include <linux/mempool.h> | |
11181 | +#include <asm/uaccess.h> | |
11182 | + | |
11183 | +#include <linux/evms/evms.h> | |
11184 | +#include <linux/evms/evms_bbr_k.h> | |
11185 | + | |
11186 | +/* API prototypes. */ | |
11187 | +static int bbr_discover(struct evms_logical_node ** discover_list); | |
11188 | +static int bbr_delete(struct evms_logical_node * node); | |
11189 | +static void bbr_read(struct evms_logical_node * node, struct buffer_head * bh); | |
11190 | +static void bbr_write(struct evms_logical_node * node, struct buffer_head * bh); | |
11191 | +static int bbr_ioctl(struct evms_logical_node * bbr_node, | |
11192 | + struct inode * inode, | |
11193 | + struct file * file, | |
11194 | + unsigned int cmd, | |
11195 | + unsigned long arg); | |
11196 | +static int bbr_direct_ioctl(struct inode * inode, | |
11197 | + struct file * file, | |
11198 | + unsigned int cmd, | |
11199 | + unsigned long arg); | |
11200 | +static int bbr_init_io(struct evms_logical_node * bbr_node, | |
11201 | + int io_flag, | |
11202 | + u64 startLSN, | |
11203 | + u64 nr_sects, | |
11204 | + void * bufptr); | |
11205 | + | |
11206 | +/* Other function prototypes. */ | |
11207 | +static int bbr_create_pools(void); | |
11208 | +static void bbr_destroy_pools(void); | |
11209 | +static u32 bbr_table_to_remap_list(struct bbr_private * bbr_id); | |
11210 | +static void bbr_io_handler(void * void_data); | |
11211 | +static void bbr_free_private(struct bbr_private * bbr_id); | |
11212 | +static inline void bbr_list_add(struct bbr_private * bbr_id); | |
11213 | + | |
11214 | +/* List of all BBR nodes. */ | |
11215 | +static struct bbr_private * bbr_instances = NULL; | |
11216 | + | |
11217 | +/* Data pertaining to the I/O thread. */ | |
11218 | +static struct evms_thread * bbr_io_thread = NULL; | |
11219 | +static spinlock_t bbr_io_list_lock = SPIN_LOCK_UNLOCKED; | |
11220 | +static struct list_head bbr_io_list = LIST_HEAD_INIT(bbr_io_list); | |
11221 | + | |
11222 | +/* Global pools for bbr_io_buf's and bbr_remap's. */ | |
11223 | +kmem_cache_t * bbr_io_buf_slab; | |
11224 | +mempool_t * bbr_io_buf_pool; | |
11225 | +kmem_cache_t * bbr_remap_slab; | |
11226 | +mempool_t * bbr_remap_pool; | |
11227 | + | |
11228 | +/* Plugin function table and header. */ | |
11229 | +static struct evms_plugin_fops function_table = { | |
11230 | + .discover = bbr_discover, | |
11231 | + .delete = bbr_delete, | |
11232 | + .read = bbr_read, | |
11233 | + .write = bbr_write, | |
11234 | + .init_io = bbr_init_io, | |
11235 | + .ioctl = bbr_ioctl, | |
11236 | + .direct_ioctl = bbr_direct_ioctl | |
11237 | +}; | |
11238 | + | |
11239 | +static struct evms_plugin_header plugin_header = { | |
11240 | + .id = SetPluginID(IBM_OEM_ID, | |
11241 | + EVMS_FEATURE, | |
11242 | + EVMS_BBR_FEATURE_ID), | |
11243 | + .version = { | |
11244 | + .major = EVMS_BBR_VERSION_MAJOR, | |
11245 | + .minor = EVMS_BBR_VERSION_MINOR, | |
11246 | + .patchlevel = EVMS_BBR_VERSION_PATCHLEVEL | |
11247 | + }, | |
11248 | + .required_services_version = { | |
11249 | + .major = EVMS_BBR_COMMON_SERVICES_MAJOR, | |
11250 | + .minor = EVMS_BBR_COMMON_SERVICES_MINOR, | |
11251 | + .patchlevel = EVMS_BBR_COMMON_SERVICES_PATCHLEVEL | |
11252 | + }, | |
11253 | + .fops = &function_table | |
11254 | +}; | |
11255 | + | |
11256 | +/** | |
11257 | + * le_meta_data_to_cpu | |
11258 | + * | |
11259 | + * Convert bbr meta data from on-disk (LE) format | |
11260 | + * to the native cpu endian format. | |
11261 | + */ | |
11262 | +void le_meta_data_to_cpu(struct evms_bbr_metadata * md) | |
11263 | +{ | |
11264 | + md->signature = le32_to_cpup(&md->signature); | |
11265 | + md->crc = le32_to_cpup(&md->crc); | |
11266 | + md->block_size = le32_to_cpup(&md->block_size); | |
11267 | + md->flags = le32_to_cpup(&md->flags); | |
11268 | + md->sequence_number = le64_to_cpup(&md->sequence_number); | |
11269 | + md->start_sect_bbr_table = le64_to_cpup(&md->start_sect_bbr_table); | |
11270 | + md->nr_sects_bbr_table = le64_to_cpup(&md->nr_sects_bbr_table); | |
11271 | + md->start_replacement_sect = le64_to_cpup(&md->start_replacement_sect); | |
11272 | + md->nr_replacement_blks = le64_to_cpup(&md->nr_replacement_blks); | |
11273 | +} | |
11274 | + | |
11275 | +/** | |
11276 | + * le_bbr_table_sector_to_cpu | |
11277 | + * | |
11278 | + * Convert bbr meta data from on-disk (LE) format | |
11279 | + * to the native cpu endian format. | |
11280 | + */ | |
11281 | +void le_bbr_table_sector_to_cpu(struct evms_bbr_table * p) | |
11282 | +{ | |
11283 | + int i; | |
11284 | + p->signature = le32_to_cpup(&p->signature); | |
11285 | + p->crc = le32_to_cpup(&p->crc); | |
11286 | + p->sequence_number = le32_to_cpup(&p->sequence_number); | |
11287 | + p->in_use_cnt = le32_to_cpup(&p->in_use_cnt); | |
11288 | + for ( i = 0; i < EVMS_BBR_ENTRIES_PER_SECT; i++ ) { | |
11289 | + p->entries[i].bad_sect = | |
11290 | + le64_to_cpup(&p->entries[i].bad_sect); | |
11291 | + p->entries[i].replacement_sect = | |
11292 | + le64_to_cpup(&p->entries[i].replacement_sect); | |
11293 | + } | |
11294 | +} | |
11295 | + | |
11296 | +/** | |
11297 | + * cpu_bbr_table_sector_to_le | |
11298 | + * | |
11299 | + * Convert bbr meta data from cpu endian format to on-disk (LE) format | |
11300 | + */ | |
11301 | +void cpu_bbr_table_sector_to_le(struct evms_bbr_table * p, | |
11302 | + struct evms_bbr_table * le) | |
11303 | +{ | |
11304 | + int i; | |
11305 | + le->signature = cpu_to_le32p(&p->signature); | |
11306 | + le->crc = cpu_to_le32p(&p->crc); | |
11307 | + le->sequence_number = cpu_to_le32p(&p->sequence_number); | |
11308 | + le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt); | |
11309 | + for ( i = 0; i < EVMS_BBR_ENTRIES_PER_SECT; i++ ) { | |
11310 | + le->entries[i].bad_sect = | |
11311 | + cpu_to_le64p(&p->entries[i].bad_sect); | |
11312 | + le->entries[i].replacement_sect = | |
11313 | + cpu_to_le64p(&p->entries[i].replacement_sect); | |
11314 | + } | |
11315 | +} | |
11316 | + | |
11317 | +#ifdef EVMS_BBR_DEBUG | |
11318 | +static void print_meta_data(struct evms_bbr_metadata * md) | |
11319 | +{ | |
11320 | + LOG_DEBUG("BBR Metadata Sector:\n" | |
11321 | + " signature 0x%08X\n" | |
11322 | + " crc 0x%08X\n" | |
11323 | + " block_size %u\n" | |
11324 | + " start_sect_bbr_table "PFU64"\n" | |
11325 | + " nr_sects_bbr_table "PFU64"\n" | |
11326 | + " start_replacement_sect "PFU64"\n" | |
11327 | + " nr_replacement_blks "PFU64"\n", | |
11328 | + md->signature, md->crc, md->block_size, | |
11329 | + md->start_sect_bbr_table, md->nr_sects_bbr_table, | |
11330 | + md->start_replacement_sect, md->nr_replacement_blks); | |
11331 | +} | |
11332 | + | |
11333 | +static void print_bbr_table_sector(struct evms_bbr_table * p) | |
11334 | +{ | |
11335 | + int i; | |
11336 | + LOG_DEBUG("BBR Table Sector:\n" | |
11337 | + " sig 0x%08X\n" | |
11338 | + " crc 0x%08X\n" | |
11339 | + " sequence %u\n" | |
11340 | + " in_use_cnt %u\n" | |
11341 | + " Table Entries:\n", | |
11342 | + p->signature, p->crc, p->sequence_number, p->in_use_cnt); | |
11343 | + for ( i = 0; i < EVMS_BBR_ENTRIES_PER_SECT; i++ ) { | |
11344 | + LOG_DEBUG(" [%d] bad_sect: "PFU64" replacement_sect: "PFU64"\n", | |
11345 | + i, p->entries[i].bad_sect, | |
11346 | + p->entries[i].replacement_sect); | |
11347 | + } | |
11348 | +} | |
11349 | + | |
11350 | +void print_binary_tree(struct bbr_runtime_remap * node) | |
11351 | +{ | |
11352 | + if (node) { | |
11353 | + LOG_DEFAULT("["PFU64","PFU64"]\n", node->remap.bad_sect, | |
11354 | + node->remap.replacement_sect); | |
11355 | + print_binary_tree(node->left); | |
11356 | + print_binary_tree(node->right); | |
11357 | + } | |
11358 | +} | |
11359 | + | |
11360 | +static void print_remap_list(struct bbr_private * bbr_id) | |
11361 | +{ | |
11362 | + if (bbr_id->remap_root) { | |
11363 | + LOG_DEFAULT("%s for %s\n", __FUNCTION__, bbr_id->node->name); | |
11364 | + print_binary_tree(bbr_id->remap_root); | |
11365 | + } | |
11366 | +} | |
11367 | +#endif | |
11368 | + | |
11369 | +/** | |
11370 | + * validate_bbr_table_sector | |
11371 | + * | |
11372 | + * Check the specified BBR table sector for a valid signature and CRC. | |
11373 | + */ | |
11374 | +static int validate_bbr_table_sector(struct evms_bbr_table * p) | |
11375 | +{ | |
11376 | + int rc = 0; | |
11377 | + int org_crc, final_crc; | |
11378 | + | |
11379 | + if ( le32_to_cpup(&p->signature) != EVMS_BBR_TABLE_SIGNATURE ) { | |
11380 | + LOG_ERROR("BBR table signature doesn't match!\n"); | |
11381 | + LOG_ERROR("Sector has (0x%08X) expected(0x%08X)\n", | |
11382 | + le32_to_cpup(&p->signature), | |
11383 | + EVMS_BBR_TABLE_SIGNATURE); | |
11384 | + rc = -EINVAL; | |
11385 | + } else { | |
11386 | + if (p->crc) { | |
11387 | + org_crc = le32_to_cpup(&p->crc); | |
11388 | + p->crc = 0; | |
11389 | + final_crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC, p, | |
11390 | + sizeof(*p)); | |
11391 | + if ( final_crc != org_crc ) { | |
11392 | + LOG_ERROR("CRC failed!\n"); | |
11393 | + LOG_ERROR("Sector has (0x%08X) calculated(0x%08X)\n", | |
11394 | + org_crc, final_crc); | |
11395 | + rc = -EINVAL; | |
11396 | + } | |
11397 | + p->crc = cpu_to_le32p(&org_crc); | |
11398 | + } else { | |
11399 | + LOG_ERROR("BBR table sector has no CRC!\n"); | |
11400 | + rc = -EINVAL; | |
11401 | + } | |
11402 | + } | |
11403 | + if (rc) | |
11404 | + BBR_DEBUG_PRINT_TABLE_SECTOR(p); | |
11405 | + le_bbr_table_sector_to_cpu(p); | |
11406 | + return rc; | |
11407 | +} | |
11408 | + | |
11409 | +/** | |
11410 | + * update_invalid_bbr_table_sector | |
11411 | + * | |
11412 | + * If one copy of a BBR table sector is bad, replace it with the valid copy. | |
11413 | + */ | |
11414 | +void update_invalid_bbr_table_sector(struct evms_logical_node * node, | |
11415 | + struct evms_bbr_table * valid, | |
11416 | + struct evms_bbr_table * invalid, | |
11417 | + u64 lsn) | |
11418 | +{ | |
11419 | + int rc; | |
11420 | + struct evms_bbr_table * tmp_bbr_table; | |
11421 | + | |
11422 | + /* Correct the invalid bbr table sector */ | |
11423 | + memcpy(invalid, valid, sizeof(struct evms_bbr_table)); | |
11424 | + | |
11425 | + /* Allocate memory for I/O */ | |
11426 | + tmp_bbr_table = kmalloc(sizeof(struct evms_bbr_table), GFP_KERNEL); | |
11427 | + if (tmp_bbr_table) { | |
11428 | + memset(tmp_bbr_table, 0, sizeof(struct evms_bbr_table)); | |
11429 | + cpu_bbr_table_sector_to_le(valid, tmp_bbr_table); | |
11430 | + LOG_WARNING("Correcting BBR table sector "PFU64"\n", lsn); | |
11431 | + rc = INIT_IO(node, 1, lsn, 1, tmp_bbr_table); | |
11432 | + if (rc) { | |
11433 | + LOG_ERROR("Could not correct BBR table sector "PFU64".\n", | |
11434 | + lsn); | |
11435 | + } | |
11436 | + kfree(tmp_bbr_table); | |
11437 | + } | |
11438 | +} | |
11439 | + | |
11440 | +/** | |
11441 | + * validate_bbr_table | |
11442 | + * | |
11443 | + * Validate the entire range of sectors in the BBR table. | |
11444 | + */ | |
11445 | +static u32 validate_bbr_table(struct evms_bbr_metadata * md, | |
11446 | + struct evms_bbr_table * p) | |
11447 | +{ | |
11448 | + u32 i, nr_sects; | |
11449 | + | |
11450 | + nr_sects = md->nr_sects_bbr_table; | |
11451 | + | |
11452 | + for ( i = 0; i < nr_sects; i++, p++ ) { | |
11453 | + if ( validate_bbr_table_sector(p) ) | |
11454 | + break; | |
11455 | + } | |
11456 | + | |
11457 | + if ( i != nr_sects ) { | |
11458 | + LOG_SERIOUS("Stopped BBR table validation at sector %u.\n", i); | |
11459 | + nr_sects = i; | |
11460 | + } | |
11461 | + LOG_DEBUG("Validated %u BBR table sectors.\n", nr_sects); | |
11462 | + return nr_sects; | |
11463 | +} | |
11464 | + | |
11465 | +/** | |
11466 | + * validate_bbr_tables | |
11467 | + * @node: BBR node to validate. | |
11468 | + * @MD1: Primary metadata sector. | |
11469 | + * @MD2: Secondary metadata sector. | |
11470 | + * @p1: Primary BBR table. | |
11471 | + * @p2: Secondary BBR table. | |
11472 | + * | |
11473 | + * Validate both copies of the BBR table. If one of them is invalid, | |
11474 | + * try to correct the errors using the valid copy. | |
11475 | + */ | |
11476 | +static u32 validate_bbr_tables(struct evms_logical_node * node, | |
11477 | + struct evms_bbr_metadata * MD1, | |
11478 | + struct evms_bbr_metadata * MD2, | |
11479 | + struct evms_bbr_table * p1, | |
11480 | + struct evms_bbr_table * p2) | |
11481 | +{ | |
11482 | + u32 i, rc1, rc2, nr_sects; | |
11483 | + | |
11484 | + nr_sects = MD1->nr_sects_bbr_table; | |
11485 | + if ( nr_sects != MD2->nr_sects_bbr_table ) { | |
11486 | + nr_sects = (nr_sects < MD2->nr_sects_bbr_table) ? | |
11487 | + nr_sects : MD2->nr_sects_bbr_table; | |
11488 | + LOG_SERIOUS("Size of BBR tables don't match. Using %u\n", | |
11489 | + nr_sects); | |
11490 | + } | |
11491 | + | |
11492 | + for ( i = 0; i < nr_sects; i++, p1++, p2++ ) { | |
11493 | + rc1 = validate_bbr_table_sector(p1); | |
11494 | + if (rc1) { | |
11495 | + LOG_WARNING("Invalid BBR table sector at "PFU64".\n", | |
11496 | + MD1->start_sect_bbr_table + i); | |
11497 | + } | |
11498 | + rc2 = validate_bbr_table_sector(p2); | |
11499 | + if (rc2) { | |
11500 | + LOG_WARNING("Invalid BBR table sector at "PFU64".\n", | |
11501 | + MD2->start_sect_bbr_table + i); | |
11502 | + } | |
11503 | + | |
11504 | + /* Correct BBR table errors. */ | |
11505 | + if (rc1 && rc2) { | |
11506 | + /* Cannot fix. */ | |
11507 | + break; | |
11508 | + } else if (rc1) { | |
11509 | + update_invalid_bbr_table_sector(node, p2, p1, | |
11510 | + MD1->start_sect_bbr_table + i); | |
11511 | + continue; | |
11512 | + } else if (rc2) { | |
11513 | + update_invalid_bbr_table_sector(node, p1, p2, | |
11514 | + MD2->start_sect_bbr_table + i); | |
11515 | + continue; | |
11516 | + } | |
11517 | + | |
11518 | + if ( p1->sequence_number != p2->sequence_number ) { | |
11519 | + LOG_WARNING("Sequence numbers for BBR table index %u don't match.\n", i); | |
11520 | + LOG_WARNING("MD1 sequence_nr=%u, MD2 sequence_nr_2=%u\n", | |
11521 | + p1->sequence_number, p2->sequence_number); | |
11522 | + if ( p1->sequence_number < p2->sequence_number ) { | |
11523 | + update_invalid_bbr_table_sector(node, p2, p1, | |
11524 | + MD1->start_sect_bbr_table + i); | |
11525 | + } else { | |
11526 | + update_invalid_bbr_table_sector(node, p1, p2, | |
11527 | + MD2->start_sect_bbr_table + i); | |
11528 | + } | |
11529 | + } | |
11530 | + } | |
11531 | + if ( i != nr_sects ) { | |
11532 | + LOG_SERIOUS("Stopped validation at sector %u\n", i); | |
11533 | + nr_sects = i; | |
11534 | + } | |
11535 | + LOG_DEBUG("Validated %u BBR table sectors.\n", nr_sects); | |
11536 | + return nr_sects; | |
11537 | +} | |
11538 | + | |
11539 | +/** | |
11540 | + * validate_meta_data | |
11541 | + * | |
11542 | + * Check the specified BBR metadata sector for a valid signature and CRC. | |
11543 | + */ | |
11544 | +static int validate_meta_data(struct evms_bbr_metadata * md) | |
11545 | +{ | |
11546 | + int org_crc, final_crc; | |
11547 | + | |
11548 | + BBR_DEBUG_PRINT_META_DATA(md); | |
11549 | + | |
11550 | + if ( le32_to_cpup(&md->signature) != EVMS_BBR_SIGNATURE ) { | |
11551 | + LOG_SERIOUS("BBR signature doesn't match!\n"); | |
11552 | + LOG_SERIOUS("Found: 0x%08X Expecting: 0x%08X\n", | |
11553 | + le32_to_cpup(&md->signature), EVMS_BBR_SIGNATURE); | |
11554 | + return -EINVAL; | |
11555 | + } | |
11556 | + | |
11557 | + if (md->crc) { | |
11558 | + org_crc = le32_to_cpup(&md->crc); | |
11559 | + md->crc = 0; | |
11560 | + final_crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC, md, | |
11561 | + sizeof(*md)); | |
11562 | + if ( final_crc != org_crc ) { | |
11563 | + LOG_ERROR("CRC failed!\n"); | |
11564 | + LOG_ERROR("Sector has (0x%08X) calculated(0x%08X)\n", | |
11565 | + org_crc, final_crc); | |
11566 | + return -EINVAL; | |
11567 | + } | |
11568 | + md->crc = cpu_to_le32p(&org_crc); | |
11569 | + } else { | |
11570 | + LOG_WARNING("Metadata sector has no CRC!\n"); | |
11571 | + } | |
11572 | + | |
11573 | + le_meta_data_to_cpu(md); | |
11574 | + return 0; | |
11575 | +} | |
11576 | + | |
11577 | +/** | |
11578 | + * bbr_load_meta_data | |
11579 | + * @node: BBR node to read metadata from. | |
11580 | + * @lsn: Sector to read metadata from. | |
11581 | + * @md: Pointer to return metadata structure. | |
11582 | + * @bbr_table: Pointer to return BBR table. | |
11583 | + * | |
11584 | + * Load one copy of the BBR metadata. If the metadata is valid, load the | |
11585 | + * corresponding copy of the BBR table. | |
11586 | + */ | |
11587 | +static int load_meta_data(struct evms_logical_node * node, | |
11588 | + u64 lsn, | |
11589 | + struct evms_bbr_metadata ** md, | |
11590 | + struct evms_bbr_table ** bbr_table) | |
11591 | +{ | |
11592 | + int rc; | |
11593 | + | |
11594 | + *md = NULL; | |
11595 | + *bbr_table = NULL; | |
11596 | + | |
11597 | + if (!lsn) { | |
11598 | + LOG_WARNING("No sector specified for BBR metadata on %s.\n", | |
11599 | + node->name); | |
11600 | + return -ENODATA; | |
11601 | + } | |
11602 | + | |
11603 | + /* Allocate a buffer for the metadata sector. */ | |
11604 | + *md = kmalloc(sizeof(struct evms_bbr_metadata), GFP_KERNEL); | |
11605 | + if (!*md) { | |
11606 | + LOG_ERROR("kmalloc error creating metadata buffer for %s.\n", | |
11607 | + node->name); | |
11608 | + return -ENOMEM; | |
11609 | + } | |
11610 | + | |
11611 | + /* Read the metadata sector. */ | |
11612 | + rc = INIT_IO(node, 0, lsn, 1, *md); | |
11613 | + if (rc) { | |
11614 | + LOG_ERROR("init_io error on %s.\n", node->name); | |
11615 | + kfree(*md); | |
11616 | + *md = NULL; | |
11617 | + return rc; | |
11618 | + } | |
11619 | + | |
11620 | + /* Validate the metadata sector. */ | |
11621 | + rc = validate_meta_data(*md); | |
11622 | + if (rc) { | |
11623 | + LOG_ERROR("Error validating metadata for %s.\n", node->name); | |
11624 | + kfree(*md); | |
11625 | + *md = NULL; | |
11626 | + return rc; | |
11627 | + } | |
11628 | + | |
11629 | + /* Allocate a buffer for the BBR table. */ | |
11630 | + *bbr_table = kmalloc((*md)->nr_sects_bbr_table << | |
11631 | + EVMS_VSECTOR_SIZE_SHIFT, GFP_KERNEL); | |
11632 | + if (!*bbr_table) { | |
11633 | + LOG_ERROR("kmalloc error creating BBR table buffer for %s.\n", | |
11634 | + node->name); | |
11635 | + kfree(*md); | |
11636 | + *md = NULL; | |
11637 | + return -ENOMEM; | |
11638 | + } | |
11639 | + | |
11640 | + /* Read the BBR table but don't validate here. */ | |
11641 | + rc = INIT_IO(node, 0, (*md)->start_sect_bbr_table, | |
11642 | + (*md)->nr_sects_bbr_table, *bbr_table); | |
11643 | + if (rc) { | |
11644 | + LOG_ERROR("init_io error on %s.\n", node->name); | |
11645 | + kfree(*md); | |
11646 | + *md = NULL; | |
11647 | + kfree(*bbr_table); | |
11648 | + *bbr_table = NULL; | |
11649 | + } | |
11650 | + | |
11651 | + return rc; | |
11652 | +} | |
11653 | + | |
11654 | +/** | |
11655 | + * bbr_load_feature_data | |
11656 | + * @node: BBR node | |
11657 | + * @ID: Return pointer to BBR private data. | |
11658 | + * | |
11659 | + * Load both copies of the BBR metadata and table. If one is invalid, try | |
11660 | + * to correct is using the valid copy. When a valid copy is found, create | |
11661 | + * a private data structure for the specified node. | |
11662 | + */ | |
11663 | +static int load_feature_data(struct evms_logical_node * node, | |
11664 | + struct bbr_private ** ID) | |
11665 | +{ | |
11666 | + struct evms_bbr_metadata * md1 = NULL; | |
11667 | + struct evms_bbr_metadata * md2 = NULL; | |
11668 | + struct evms_bbr_table * table1 = NULL; | |
11669 | + struct evms_bbr_table * table2 = NULL; | |
11670 | + u64 lba_table1 = 0, lba_table2 = 0; | |
11671 | + u32 nr_sects = 0; | |
11672 | + int rc = 0, rc1, rc2; | |
11673 | + | |
11674 | + *ID = NULL; | |
11675 | + | |
11676 | + /* Load metadata 1 */ | |
11677 | + rc1 = load_meta_data(node, | |
11678 | + node->feature_header->feature_data1_start_lsn, | |
11679 | + &md1, &table1); | |
11680 | + /* Load metadata 2 */ | |
11681 | + rc2 = load_meta_data(node, | |
11682 | + node->feature_header->feature_data2_start_lsn, | |
11683 | + &md2, &table2); | |
11684 | + | |
11685 | + if (rc1 && rc2) { | |
11686 | + /* Both copies are bad? Cannot continue. */ | |
11687 | + rc = -ENODATA; | |
11688 | + } else if (rc1 || rc2) { | |
11689 | + /* One copy is bad. Use the good copy. */ | |
11690 | + if (rc1) { | |
11691 | + lba_table2 = md2->start_sect_bbr_table; | |
11692 | + kfree(table1); | |
11693 | + kfree(md1); | |
11694 | + table1 = table2; | |
11695 | + table2 = NULL; | |
11696 | + md1 = md2; | |
11697 | + md2 = NULL; | |
11698 | + } else { | |
11699 | + lba_table1 = md1->start_sect_bbr_table; | |
11700 | + } | |
11701 | + | |
11702 | + nr_sects = validate_bbr_table(md1, table1); | |
11703 | + if ( nr_sects == 0 ) { | |
11704 | + rc = -ENODATA; | |
11705 | + } | |
11706 | + } else { | |
11707 | + lba_table1 = md1->start_sect_bbr_table; | |
11708 | + lba_table2 = md2->start_sect_bbr_table; | |
11709 | + nr_sects = validate_bbr_tables(node, md1, md2, table1, table2); | |
11710 | + if ( nr_sects == 0 ) { | |
11711 | + rc = -ENODATA; | |
11712 | + } | |
11713 | + } | |
11714 | + | |
11715 | + if (!rc && nr_sects) { | |
11716 | + *ID = kmalloc(sizeof(struct bbr_private), GFP_KERNEL); | |
11717 | + if (*ID) { | |
11718 | + memset(*ID, 0, sizeof(struct bbr_private)); | |
11719 | + (*ID)->source = node; | |
11720 | + (*ID)->blksize_in_sects = md1->block_size >> | |
11721 | + EVMS_VSECTOR_SIZE_SHIFT; | |
11722 | + (*ID)->remap_root = NULL; | |
11723 | + (*ID)->lba_table1 = lba_table1; | |
11724 | + (*ID)->lba_table2 = lba_table2; | |
11725 | + (*ID)->bbr_table = table1; | |
11726 | + (*ID)->nr_sects_bbr_table = nr_sects; | |
11727 | + if ( nr_sects < md1->nr_sects_bbr_table ) { | |
11728 | + LOG_WARNING("Making BBR node read-only\n"); | |
11729 | + (*ID)->flag |= EVMS_VOLUME_READ_ONLY; | |
11730 | + } | |
11731 | + (*ID)->nr_replacement_blks = nr_sects * | |
11732 | + EVMS_BBR_ENTRIES_PER_SECT; | |
11733 | + (*ID)->start_replacement_sect = md1->start_replacement_sect; | |
11734 | + (*ID)->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0); | |
11735 | + (*ID)->bbr_id_lock = SPIN_LOCK_UNLOCKED; | |
11736 | + if ( !bbr_remap_pool || !bbr_io_buf_pool ) { | |
11737 | + rc = bbr_create_pools(); | |
11738 | + } | |
11739 | + if (!rc) { | |
11740 | + atomic_set(&(*ID)->in_use_replacement_blks, | |
11741 | + bbr_table_to_remap_list(*ID)); | |
11742 | + } | |
11743 | + } else { | |
11744 | + rc = -ENOMEM; | |
11745 | + } | |
11746 | + } | |
11747 | + | |
11748 | + if (!rc) { | |
11749 | + if (!bbr_io_thread) { | |
11750 | + const char * name = "evms_bbr_io"; | |
11751 | + bbr_io_thread = evms_cs_register_thread(bbr_io_handler, | |
11752 | + NULL, name); | |
11753 | + if (!bbr_io_thread) { | |
11754 | + rc = -EINVAL; | |
11755 | + } | |
11756 | + } | |
11757 | + } | |
11758 | + | |
11759 | + /* If error, free table1. */ | |
11760 | + if (rc) { | |
11761 | + if (table1) { | |
11762 | + kfree(table1); | |
11763 | + } | |
11764 | + if (*ID) { | |
11765 | + (*ID)->bbr_table = NULL; | |
11766 | + bbr_free_private(*ID); | |
11767 | + (*ID) = NULL; | |
11768 | + } | |
11769 | + } | |
11770 | + | |
11771 | + /* Will never use md1, md2 and table2 again */ | |
11772 | + if (md1) { | |
11773 | + kfree(md1); | |
11774 | + } | |
11775 | + if (md2) { | |
11776 | + kfree(md2); | |
11777 | + } | |
11778 | + if (table2) { | |
11779 | + kfree(table2); | |
11780 | + } | |
11781 | + | |
11782 | + return rc; | |
11783 | +} | |
11784 | + | |
11785 | +/** | |
11786 | + * bbr_binary_tree_insert | |
11787 | + * | |
11788 | + * Insert a node into the binary tree. | |
11789 | + */ | |
11790 | +void bbr_binary_tree_insert(struct bbr_runtime_remap ** root, | |
11791 | + struct bbr_runtime_remap * newnode) | |
11792 | +{ | |
11793 | + struct bbr_runtime_remap ** node = root; | |
11794 | + while (node && *node) { | |
11795 | + if ( newnode->remap.bad_sect > (*node)->remap.bad_sect ) { | |
11796 | + node = &((*node)->right); | |
11797 | + } else { | |
11798 | + node = &((*node)->left); | |
11799 | + } | |
11800 | + } | |
11801 | + | |
11802 | + newnode->left = newnode->right = NULL; | |
11803 | + *node = newnode; | |
11804 | +} | |
11805 | + | |
11806 | +/** | |
11807 | + * bbr_binary_search | |
11808 | + * | |
11809 | + * Search for a node that contains bad_sect = lsn. | |
11810 | + */ | |
11811 | +struct bbr_runtime_remap * bbr_binary_search(struct bbr_runtime_remap * root, | |
11812 | + u64 lsn) | |
11813 | +{ | |
11814 | + struct bbr_runtime_remap * node = root; | |
11815 | + while (node) { | |
11816 | + if (node->remap.bad_sect == lsn) { | |
11817 | + break; | |
11818 | + } | |
11819 | + if ( lsn > node->remap.bad_sect ) { | |
11820 | + node = node->right; | |
11821 | + } else { | |
11822 | + node = node->left; | |
11823 | + } | |
11824 | + } | |
11825 | + return node; | |
11826 | +} | |
11827 | + | |
11828 | +/** | |
11829 | + * bbr_binary_tree_destroy | |
11830 | + * | |
11831 | + * Destroy the binary tree. | |
11832 | + */ | |
11833 | +void bbr_binary_tree_destroy(struct bbr_runtime_remap * root, | |
11834 | + struct bbr_private * bbr_id) | |
11835 | +{ | |
11836 | + struct bbr_runtime_remap ** link = NULL; | |
11837 | + struct bbr_runtime_remap * node = root; | |
11838 | + | |
11839 | + while (node) { | |
11840 | + if (node->left) { | |
11841 | + link = &(node->left); | |
11842 | + node = node->left; | |
11843 | + continue; | |
11844 | + } | |
11845 | + if (node->right) { | |
11846 | + link = &(node->right); | |
11847 | + node = node->right; | |
11848 | + continue; | |
11849 | + } | |
11850 | + | |
11851 | + mempool_free(node, bbr_remap_pool); | |
11852 | + if (node == root) { | |
11853 | + /* If root is deleted, we're done. */ | |
11854 | + break; | |
11855 | + } | |
11856 | + | |
11857 | + /* Back to root. */ | |
11858 | + node = root; | |
11859 | + *link = NULL; | |
11860 | + } | |
11861 | +} | |
11862 | + | |
11863 | +static void bbr_free_remap(struct bbr_private * bbr_id) | |
11864 | +{ | |
11865 | + unsigned long flags; | |
11866 | + spin_lock_irqsave(&bbr_id->bbr_id_lock, flags); | |
11867 | + bbr_binary_tree_destroy(bbr_id->remap_root, bbr_id); | |
11868 | + bbr_id->remap_root = NULL; | |
11869 | + spin_unlock_irqrestore(&bbr_id->bbr_id_lock, flags); | |
11870 | +} | |
11871 | + | |
11872 | +/** | |
11873 | + * bbr_insert_remap_entry | |
11874 | + * | |
11875 | + * Create a new remap entry and add it to the binary tree for this node. | |
11876 | + */ | |
11877 | +static int bbr_insert_remap_entry(struct bbr_private * bbr_id, | |
11878 | + struct evms_bbr_table_entry * new_bbr_entry) | |
11879 | +{ | |
11880 | + struct bbr_runtime_remap * newnode = NULL; | |
11881 | + unsigned long flags; | |
11882 | + int rc; | |
11883 | + | |
11884 | + newnode = mempool_alloc(bbr_remap_pool, GFP_NOIO); | |
11885 | + if (!newnode) { | |
11886 | + rc = -ENOMEM; | |
11887 | + LOG_SERIOUS("Could not allocate from remap pool! (rc=%d)\n", rc); | |
11888 | + return rc; | |
11889 | + } | |
11890 | + newnode->remap.bad_sect = new_bbr_entry->bad_sect; | |
11891 | + newnode->remap.replacement_sect = new_bbr_entry->replacement_sect; | |
11892 | + spin_lock_irqsave(&bbr_id->bbr_id_lock, flags); | |
11893 | + bbr_binary_tree_insert(&bbr_id->remap_root, newnode); | |
11894 | + spin_unlock_irqrestore(&bbr_id->bbr_id_lock, flags); | |
11895 | + return 0; | |
11896 | +} | |
11897 | + | |
11898 | +/** | |
11899 | + * bbr_table_to_remap_list | |
11900 | + * | |
11901 | + * The on-disk bbr table is sorted by the replacement sector LBA. In order to | |
11902 | + * improve run time performance, the in memory remap list must be sorted by | |
11903 | + * the bad sector LBA. This function is called at discovery time to initialize | |
11904 | + * the remap list. This function assumes that at least one copy of meta data | |
11905 | + * is valid. | |
11906 | + */ | |
11907 | +static u32 bbr_table_to_remap_list(struct bbr_private * bbr_id) | |
11908 | +{ | |
11909 | + u32 in_use_blks = 0; | |
11910 | + int i, j; | |
11911 | + struct evms_bbr_table * p; | |
11912 | + | |
11913 | + | |
11914 | + for ( i = 0, p = bbr_id->bbr_table; | |
11915 | + i < bbr_id->nr_sects_bbr_table; | |
11916 | + i++, p++ ) { | |
11917 | + if (!p->in_use_cnt) { | |
11918 | + break; | |
11919 | + } | |
11920 | + in_use_blks += p->in_use_cnt; | |
11921 | + for ( j = 0; j < p->in_use_cnt; j++ ) { | |
11922 | + bbr_insert_remap_entry(bbr_id, &p->entries[j]); | |
11923 | + } | |
11924 | + } | |
11925 | + | |
11926 | + return in_use_blks; | |
11927 | +} | |
11928 | + | |
11929 | +/** | |
11930 | + * bbr_search_remap_entry | |
11931 | + * | |
11932 | + * Search remap entry for the specified sector. If found, return a pointer to | |
11933 | + * the table entry. Otherwise, return NULL. | |
11934 | + */ | |
11935 | +static struct evms_bbr_table_entry * bbr_search_remap_entry(struct bbr_private * bbr_id, | |
11936 | + u64 lsn) | |
11937 | +{ | |
11938 | + struct bbr_runtime_remap * p; | |
11939 | + unsigned long flags; | |
11940 | + | |
11941 | + spin_lock_irqsave(&bbr_id->bbr_id_lock, flags); | |
11942 | + p = bbr_binary_search(bbr_id->remap_root, lsn); | |
11943 | + spin_unlock_irqrestore(&bbr_id->bbr_id_lock, flags); | |
11944 | + if (p) { | |
11945 | + return (&p->remap); | |
11946 | + } else { | |
11947 | + return NULL; | |
11948 | + } | |
11949 | +} | |
11950 | + | |
11951 | +/** | |
11952 | + * bbr_remap | |
11953 | + * | |
11954 | + * If *lsn is in the remap table, return TRUE and modify *lsn, | |
11955 | + * else, return FALSE. | |
11956 | + */ | |
11957 | +static inline int bbr_remap(struct bbr_private * bbr_id, | |
11958 | + u64 * lsn) | |
11959 | +{ | |
11960 | + struct evms_bbr_table_entry *e; | |
11961 | + | |
11962 | + if ( atomic_read(&bbr_id->in_use_replacement_blks) && | |
11963 | + ! (bbr_id->flag & BBR_STOP_REMAP) ) { | |
11964 | + e = bbr_search_remap_entry(bbr_id, *lsn); | |
11965 | + if (e) { | |
11966 | + *lsn = e->replacement_sect; | |
11967 | + LOG_EXTRA("%s replacement sector (LSN="PFU64")\n", | |
11968 | + __FUNCTION__, *lsn); | |
11969 | + return TRUE; | |
11970 | + } | |
11971 | + } | |
11972 | + return FALSE; | |
11973 | +} | |
11974 | + | |
11975 | +/** | |
11976 | + * bbr_remap_probe | |
11977 | + * | |
11978 | + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap | |
11979 | + * table return TRUE, Else, return FALSE. | |
11980 | + */ | |
11981 | +static inline int bbr_remap_probe(struct bbr_private * bbr_id, | |
11982 | + u64 lsn, u64 nr_sects) | |
11983 | +{ | |
11984 | + u64 tmp, cnt; | |
11985 | + | |
11986 | + if ( atomic_read(&bbr_id->in_use_replacement_blks) && | |
11987 | + ! (bbr_id->flag & BBR_STOP_REMAP) ) { | |
11988 | + for ( cnt = 0, tmp = lsn; | |
11989 | + cnt < nr_sects; | |
11990 | + cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) { | |
11991 | + if ( bbr_remap(bbr_id,&tmp) ) { | |
11992 | + return TRUE; | |
11993 | + } | |
11994 | + } | |
11995 | + } | |
11996 | + return FALSE; | |
11997 | +} | |
11998 | + | |
11999 | +static void *bbr_slab_pool_alloc(int gfp_mask, void * data) | |
12000 | +{ | |
12001 | + return kmem_cache_alloc(data, gfp_mask); | |
12002 | +} | |
12003 | + | |
12004 | +static void bbr_slab_pool_free(void *ptr, void * data) | |
12005 | +{ | |
12006 | + kmem_cache_free(data, ptr); | |
12007 | +} | |
12008 | + | |
12009 | +static int bbr_create_pools(void) | |
12010 | +{ | |
12011 | + /* Create a memory pool for the remap list. */ | |
12012 | + if (!bbr_remap_slab) { | |
12013 | + bbr_remap_slab = kmem_cache_create("BBR_Remap_Slab", | |
12014 | + sizeof(struct bbr_runtime_remap), | |
12015 | + 0, SLAB_HWCACHE_ALIGN, | |
12016 | + NULL, NULL); | |
12017 | + if (!bbr_remap_slab) { | |
12018 | + panic("Unable to create BBR remap cache."); | |
12019 | + } | |
12020 | + } | |
12021 | + if (!bbr_remap_pool) { | |
12022 | + bbr_remap_pool = mempool_create(64, bbr_slab_pool_alloc, | |
12023 | + bbr_slab_pool_free, | |
12024 | + bbr_remap_slab); | |
12025 | + if (!bbr_remap_pool) { | |
12026 | + panic("Unable to create BBR remap pool."); | |
12027 | + } | |
12028 | + } | |
12029 | + | |
12030 | + /* Create a memory pool for the BBR I/O anchors. */ | |
12031 | + if (!bbr_io_buf_slab) { | |
12032 | + bbr_io_buf_slab = kmem_cache_create("BBR_IO_Buf_Slab", | |
12033 | + sizeof(struct bbr_io_buffer), | |
12034 | + 0, SLAB_HWCACHE_ALIGN, | |
12035 | + NULL, NULL); | |
12036 | + if (!bbr_io_buf_slab) { | |
12037 | + panic("Unable to create BBR I/O buffer cache."); | |
12038 | + } | |
12039 | + } | |
12040 | + if (!bbr_io_buf_pool) { | |
12041 | + bbr_io_buf_pool = mempool_create(256, bbr_slab_pool_alloc, | |
12042 | + bbr_slab_pool_free, | |
12043 | + bbr_io_buf_slab); | |
12044 | + if (!bbr_io_buf_pool) { | |
12045 | + panic("Unable to create BBR I/O buffer pool."); | |
12046 | + } | |
12047 | + } | |
12048 | + | |
12049 | + return 0; | |
12050 | +} | |
12051 | + | |
12052 | +static void bbr_destroy_pools(void) | |
12053 | +{ | |
12054 | + if (bbr_io_buf_pool) { | |
12055 | + mempool_destroy(bbr_io_buf_pool); | |
12056 | + bbr_io_buf_pool = NULL; | |
12057 | + } | |
12058 | + if (bbr_io_buf_slab) { | |
12059 | + kmem_cache_destroy(bbr_io_buf_slab); | |
12060 | + bbr_io_buf_slab = NULL; | |
12061 | + } | |
12062 | + if (bbr_remap_pool) { | |
12063 | + mempool_destroy(bbr_remap_pool); | |
12064 | + bbr_remap_pool = NULL; | |
12065 | + } | |
12066 | + if (bbr_remap_slab) { | |
12067 | + kmem_cache_destroy(bbr_remap_slab); | |
12068 | + bbr_remap_slab = NULL; | |
12069 | + } | |
12070 | +} | |
12071 | + | |
12072 | +/** | |
12073 | + * bbr_discover | |
12074 | + * | |
12075 | + * Search through the discover list looking for object with BBR metadata. | |
12076 | + * Remove them from the list and replace with a new BBR node. | |
12077 | + */ | |
12078 | +static int bbr_discover(struct evms_logical_node ** discover_list) | |
12079 | +{ | |
12080 | + struct evms_logical_node * node, * next_node; | |
12081 | + struct evms_logical_node * bbr_node = NULL; | |
12082 | + struct bbr_private * bbr_id; | |
12083 | + int bad_blocks, rc = 0; | |
12084 | + | |
12085 | + MOD_INC_USE_COUNT; | |
12086 | + | |
12087 | + next_node = *discover_list; | |
12088 | + while (next_node) { | |
12089 | + node = next_node; | |
12090 | + next_node = node->next; | |
12091 | + | |
12092 | + /* The node must have a BBR feature-header. */ | |
12093 | + if ( ! node->feature_header || | |
12094 | + node->feature_header->feature_id != plugin_header.id ) { | |
12095 | + continue; | |
12096 | + } | |
12097 | + | |
12098 | + rc = load_feature_data(node, &bbr_id); | |
12099 | + if (rc) { | |
12100 | + /* Error loading feature data. | |
12101 | + * This node belongs to us, but metadata is invalid, | |
12102 | + * - remove it from the discovery list | |
12103 | + * - delete it | |
12104 | + * - clear error code then continue. | |
12105 | + * Will consider creating a read only BBR node in | |
12106 | + * the future. | |
12107 | + */ | |
12108 | + LOG_SERIOUS("Error in node (%s) with "PFU64" sectors.\n", | |
12109 | + node->name, node->total_vsectors); | |
12110 | + evms_cs_remove_logical_node_from_list(discover_list, | |
12111 | + node); | |
12112 | + DELETE(node); | |
12113 | + rc = 0; | |
12114 | + continue; | |
12115 | + } | |
12116 | + | |
12117 | + rc = evms_cs_allocate_logical_node(&bbr_node); | |
12118 | + if (rc) { | |
12119 | + LOG_SERIOUS("Could not allocate logical node! rc=%d\n", rc); | |
12120 | + bbr_free_private(bbr_id); | |
12121 | + continue; | |
12122 | + } | |
12123 | + | |
12124 | + MOD_INC_USE_COUNT; | |
12125 | + bbr_node->volume_info = node->volume_info; | |
12126 | + bbr_node->flags |= node->flags; | |
12127 | + bbr_node->plugin = &plugin_header; | |
12128 | + strcpy(bbr_node->name, | |
12129 | + node->feature_header->object_name); | |
12130 | + bbr_node->hardsector_size = node->hardsector_size; | |
12131 | + bbr_node->total_vsectors = node->total_vsectors - 2 - | |
12132 | + node->feature_header->feature_data1_size - | |
12133 | + node->feature_header->feature_data2_size; | |
12134 | + bbr_node->block_size = node->block_size; | |
12135 | + bbr_node->private = bbr_id; | |
12136 | + bbr_id->node = bbr_node; | |
12137 | + | |
12138 | + /* Free the feature header */ | |
12139 | + kfree(node->feature_header); | |
12140 | + node->feature_header = NULL; | |
12141 | + evms_cs_remove_logical_node_from_list(discover_list, node); | |
12142 | + | |
12143 | + /* If bad blocks exist, give warning */ | |
12144 | + bad_blocks = atomic_read(&bbr_id->in_use_replacement_blks); | |
12145 | + if (bad_blocks) { | |
12146 | + BBR_DEBUG_PRINT_REMAP_LIST(bbr_id); | |
12147 | + LOG_WARNING("%s has %d bad blocks.\n", | |
12148 | + bbr_id->source->name, bad_blocks); | |
12149 | + LOG_WARNING("There are "PFU64" total replacement blocks.\n", | |
12150 | + bbr_id->nr_replacement_blks); | |
12151 | + LOG_WARNING("There are "PFU64" remaining replacement blocks.\n", | |
12152 | + bbr_id->nr_replacement_blks - | |
12153 | + bad_blocks); | |
12154 | + } | |
12155 | + | |
12156 | + evms_cs_add_logical_node_to_list(discover_list, bbr_node); | |
12157 | + bbr_list_add(bbr_id); | |
12158 | + } | |
12159 | + | |
12160 | + MOD_DEC_USE_COUNT; | |
12161 | + return rc; | |
12162 | +} | |
12163 | + | |
12164 | +static inline void bbr_list_add(struct bbr_private * bbr_id) | |
12165 | +{ | |
12166 | + bbr_id->next = bbr_instances; | |
12167 | + bbr_instances = bbr_id; | |
12168 | +} | |
12169 | + | |
12170 | +static void bbr_list_remove(struct bbr_private * bbr_id) | |
12171 | +{ | |
12172 | + struct bbr_private ** p; | |
12173 | + | |
12174 | + for ( p = &bbr_instances; *p; p = &(*p)->next ) { | |
12175 | + if ( *p == bbr_id ) { | |
12176 | + *p = (*p)->next; | |
12177 | + break; | |
12178 | + } | |
12179 | + } | |
12180 | +} | |
12181 | + | |
12182 | +static struct bbr_private * bbr_find_private(char * object_name) | |
12183 | +{ | |
12184 | + struct bbr_private * p; | |
12185 | + | |
12186 | + for ( p = bbr_instances; p; p = p->next ) { | |
12187 | + if ( ! strncmp(p->node->name, object_name, | |
12188 | + EVMS_VOLUME_NAME_SIZE) ) { | |
12189 | + return p; | |
12190 | + } | |
12191 | + } | |
12192 | + return NULL; | |
12193 | +} | |
12194 | + | |
12195 | +static void bbr_free_private(struct bbr_private * bbr_id) | |
12196 | +{ | |
12197 | + if (bbr_id->remap_root) { | |
12198 | + bbr_free_remap(bbr_id); | |
12199 | + } | |
12200 | + if (bbr_id->bbr_table) { | |
12201 | + kfree(bbr_id->bbr_table); | |
12202 | + } | |
12203 | + bbr_list_remove(bbr_id); | |
12204 | + kfree(bbr_id); | |
12205 | +} | |
12206 | + | |
12207 | +/** | |
12208 | + * bbr_delete | |
12209 | + * | |
12210 | + * Delete the specified BBR node and the node it is built on. If the last BBR | |
12211 | + * node is deleted, shut down the I/O thread. | |
12212 | + */ | |
12213 | +static int bbr_delete(struct evms_logical_node * bbr_node) | |
12214 | +{ | |
12215 | + struct bbr_private * bbr_id; | |
12216 | + int rc; | |
12217 | + | |
12218 | + bbr_id = bbr_node->private; | |
12219 | + | |
12220 | + rc = DELETE(bbr_id->source); | |
12221 | + if (!rc) { | |
12222 | + /* Now cleanup and go away */ | |
12223 | + bbr_free_private(bbr_id); | |
12224 | + evms_cs_deallocate_logical_node(bbr_node); | |
12225 | + if (!bbr_instances) { | |
12226 | + bbr_destroy_pools(); | |
12227 | + if (bbr_io_thread) { | |
12228 | + evms_cs_unregister_thread(bbr_io_thread); | |
12229 | + bbr_io_thread = NULL; | |
12230 | + } | |
12231 | + } | |
12232 | + MOD_DEC_USE_COUNT; | |
12233 | + } | |
12234 | + return rc; | |
12235 | +} | |
12236 | + | |
12237 | +static struct bbr_io_buffer * allocate_bbr_io_buf(struct bbr_private * bbr_id, | |
12238 | + struct buffer_head * bh, | |
12239 | + int rw) | |
12240 | +{ | |
12241 | + struct bbr_io_buffer * bbr_io_buf; | |
12242 | + | |
12243 | + bbr_io_buf = mempool_alloc(bbr_io_buf_pool, GFP_NOIO); | |
12244 | + if (bbr_io_buf) { | |
12245 | + memset(bbr_io_buf, 0, sizeof(struct bbr_io_buffer)); | |
12246 | + INIT_LIST_HEAD(&bbr_io_buf->bbr_io_list); | |
12247 | + bbr_io_buf->bbr_id = bbr_id; | |
12248 | + bbr_io_buf->bh = bh; | |
12249 | + bbr_io_buf->rw = rw; | |
12250 | + } else { | |
12251 | + LOG_WARNING("Could not allocate from BBR I/O buffer pool!\n"); | |
12252 | + } | |
12253 | + return bbr_io_buf; | |
12254 | +} | |
12255 | + | |
12256 | +static void free_bbr_io_buf(struct bbr_io_buffer * bbr_io_buf) | |
12257 | +{ | |
12258 | + mempool_free(bbr_io_buf, bbr_io_buf_pool); | |
12259 | +} | |
12260 | + | |
12261 | +/** | |
12262 | + * bbr_io_remap_error | |
12263 | + * @bbr_id: Private data for the BBR node. | |
12264 | + * @rw: READ or WRITE. | |
12265 | + * @starting_lsn: Starting sector of request to remap. | |
12266 | + * @count: Number of sectors in the request. | |
12267 | + * @buffer: Data buffer for the request. | |
12268 | + * | |
12269 | + * For the requested range, try to write each sector individually. For each | |
12270 | + * sector that fails, find the next available remap location and write the | |
12271 | + * data to that new location. Then update the table and write both copies | |
12272 | + * of the table to disk. Finally, update the in-memory mapping and do any | |
12273 | + * other necessary bookkeeping. | |
12274 | + */ | |
12275 | +static int bbr_io_remap_error(struct bbr_private * bbr_id, | |
12276 | + int rw, | |
12277 | + u64 starting_lsn, | |
12278 | + u64 count, | |
12279 | + char * buffer ) | |
12280 | +{ | |
12281 | + struct evms_bbr_table * bbr_table; | |
12282 | + unsigned long table_sector_index; | |
12283 | + unsigned long table_sector_offset; | |
12284 | + unsigned long index; | |
12285 | + u64 lsn, new_lsn; | |
12286 | + int rc; | |
12287 | + | |
12288 | + if ( rw == READ ) { | |
12289 | + /* Nothing can be done about read errors. */ | |
12290 | + return -EIO; | |
12291 | + } | |
12292 | + | |
12293 | + /* For each sector in the request. */ | |
12294 | + for ( lsn = 0; lsn < count; lsn++, buffer += EVMS_VSECTOR_SIZE ) { | |
12295 | + rc = INIT_IO(bbr_id->source, rw, starting_lsn + lsn, 1, buffer); | |
12296 | + while (rc) { | |
12297 | + if ( bbr_id->flag & BBR_STOP_REMAP ) { | |
12298 | + /* Can't allow new remaps if the | |
12299 | + * engine told us to stop. | |
12300 | + */ | |
12301 | + LOG_ERROR("Object %s: Bad sector ("PFU64"), but remapping is turned off.\n", | |
12302 | + bbr_id->node->name, starting_lsn+lsn); | |
12303 | + return -EIO; | |
12304 | + } | |
12305 | + | |
12306 | + /* Find the next available relocation sector. */ | |
12307 | + new_lsn = atomic_read(&bbr_id->in_use_replacement_blks); | |
12308 | + if ( new_lsn >= bbr_id->nr_replacement_blks ) { | |
12309 | + /* No more replacement sectors available. */ | |
12310 | + return -EIO; | |
12311 | + } | |
12312 | + new_lsn += bbr_id->start_replacement_sect; | |
12313 | + | |
12314 | + /* Write the data to its new location. */ | |
12315 | + LOG_WARNING("Object %s: Trying to remap bad sector ("PFU64") to sector ("PFU64")\n", | |
12316 | + bbr_id->node->name, starting_lsn + lsn, | |
12317 | + new_lsn); | |
12318 | + rc = INIT_IO(bbr_id->source, rw, new_lsn, 1, buffer); | |
12319 | + if (rc) { | |
12320 | + /* This replacement sector is bad. | |
12321 | + * Try the next one. | |
12322 | + */ | |
12323 | + LOG_ERROR("Object %s: Replacement sector ("PFU64") is bad. Skipping.\n", | |
12324 | + bbr_id->node->name, new_lsn); | |
12325 | + atomic_inc(&bbr_id->in_use_replacement_blks); | |
12326 | + continue; | |
12327 | + } | |
12328 | + | |
12329 | + /* Add this new entry to the on-disk table. */ | |
12330 | + table_sector_index = new_lsn - | |
12331 | + bbr_id->start_replacement_sect; | |
12332 | + table_sector_offset = table_sector_index / | |
12333 | + EVMS_BBR_ENTRIES_PER_SECT; | |
12334 | + index = table_sector_index % EVMS_BBR_ENTRIES_PER_SECT; | |
12335 | + | |
12336 | + bbr_table = &bbr_id->bbr_table[table_sector_offset]; | |
12337 | + bbr_table->entries[index].bad_sect = starting_lsn + lsn; | |
12338 | + bbr_table->entries[index].replacement_sect = new_lsn; | |
12339 | + bbr_table->in_use_cnt++; | |
12340 | + bbr_table->sequence_number++; | |
12341 | + bbr_table->crc = 0; | |
12342 | + bbr_table->crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC, | |
12343 | + bbr_table, | |
12344 | + sizeof(struct evms_bbr_table)); | |
12345 | + | |
12346 | + /* Write the table to disk. */ | |
12347 | + cpu_bbr_table_sector_to_le(bbr_table, bbr_table); | |
12348 | + if ( bbr_id->lba_table1 ) { | |
12349 | + rc = INIT_IO(bbr_id->source, WRITE, | |
12350 | + bbr_id->lba_table1 + | |
12351 | + table_sector_offset, | |
12352 | + 1, bbr_table); | |
12353 | + } | |
12354 | + if ( bbr_id->lba_table2 ) { | |
12355 | + rc |= INIT_IO(bbr_id->source, WRITE, | |
12356 | + bbr_id->lba_table2 + | |
12357 | + table_sector_offset, | |
12358 | + 1, bbr_table); | |
12359 | + } | |
12360 | + le_bbr_table_sector_to_cpu(bbr_table); | |
12361 | + | |
12362 | + if (rc) { | |
12363 | + /* Error writing one of the tables to disk. */ | |
12364 | + LOG_ERROR("Object %s: Error updating BBR tables on disk.\n", | |
12365 | + bbr_id->node->name); | |
12366 | + return rc; | |
12367 | + } | |
12368 | + | |
12369 | + /* Insert a new entry in the remapping binary-tree. */ | |
12370 | + rc = bbr_insert_remap_entry(bbr_id, | |
12371 | + &bbr_table->entries[index]); | |
12372 | + if (rc) { | |
12373 | + LOG_ERROR("Object %s: Error adding new entry to remap tree.\n", | |
12374 | + bbr_id->node->name); | |
12375 | + return rc; | |
12376 | + } | |
12377 | + | |
12378 | + atomic_inc(&bbr_id->in_use_replacement_blks); | |
12379 | + } | |
12380 | + } | |
12381 | + | |
12382 | + return 0; | |
12383 | +} | |
12384 | + | |
12385 | +/** | |
12386 | + * bbr_io_process_request | |
12387 | + * | |
12388 | + * For each sector in this request, check if the sector has already | |
12389 | + * been remapped. If so, process all previous sectors in the request, | |
12390 | + * followed by the remapped sector. Then reset the starting lsn and | |
12391 | + * count, and keep going with the rest of the request as if it were | |
12392 | + * a whole new request. If any of the INIT_IO's return an error, | |
12393 | + * call the remapper to relocate the bad sector(s). | |
12394 | + */ | |
12395 | +static int bbr_io_process_request(struct bbr_io_buffer * bbr_io_buf) | |
12396 | +{ | |
12397 | + struct bbr_private * bbr_id = bbr_io_buf->bbr_id; | |
12398 | + u64 starting_lsn = bbr_io_buf->bh->b_rsector; | |
12399 | + u64 count = bbr_io_buf->bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
12400 | + u64 lsn, remapped_lsn; | |
12401 | + char * buffer = bbr_io_buf->bh->b_data; | |
12402 | + int rc = 0, rw = bbr_io_buf->rw; | |
12403 | + | |
12404 | + /* For each sector in this request, check if this sector has already | |
12405 | + * been remapped. If so, process all previous sectors in this request, | |
12406 | + * followed by the remapped sector. Then reset the starting lsn and | |
12407 | + * count and keep going with the rest of the request as if it were | |
12408 | + * a whole new request. | |
12409 | + */ | |
12410 | + for ( lsn = 0; lsn < count && !(bbr_id->flag & BBR_STOP_REMAP); lsn++ ) { | |
12411 | + remapped_lsn = starting_lsn + lsn; | |
12412 | + rc = bbr_remap(bbr_id, &remapped_lsn); | |
12413 | + if (!rc) { | |
12414 | + /* This sector is fine. */ | |
12415 | + continue; | |
12416 | + } | |
12417 | + | |
12418 | + /* Process all sectors in the request up to this one. */ | |
12419 | + if ( lsn > 0 ) { | |
12420 | + rc = INIT_IO(bbr_id->source, rw, | |
12421 | + starting_lsn, lsn, buffer); | |
12422 | + if (rc) { | |
12423 | + /* If this I/O failed, then one of the sectors | |
12424 | + * in this request needs to be relocated. | |
12425 | + */ | |
12426 | + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn, | |
12427 | + lsn, buffer); | |
12428 | + if (rc) { | |
12429 | + return rc; | |
12430 | + } | |
12431 | + } | |
12432 | + buffer += (lsn << EVMS_VSECTOR_SIZE_SHIFT); | |
12433 | + } | |
12434 | + | |
12435 | + /* Process the remapped sector. */ | |
12436 | + rc = INIT_IO(bbr_id->source, rw, remapped_lsn, 1, buffer); | |
12437 | + if (rc) { | |
12438 | + /* BUGBUG - Need more processing if this caused an | |
12439 | + * an error. If this I/O failed, then the existing | |
12440 | + * remap is now bad, and we need to find a new remap. | |
12441 | + * Can't use bbr_io_remap_error(), because the existing | |
12442 | + * map entry needs to be changed, not added again, and | |
12443 | + * the original table entry also needs to be changed. | |
12444 | + */ | |
12445 | + return rc; | |
12446 | + } | |
12447 | + | |
12448 | + buffer += EVMS_VSECTOR_SIZE; | |
12449 | + starting_lsn += (lsn + 1); | |
12450 | + count -= (lsn + 1); | |
12451 | + lsn = -1; | |
12452 | + } | |
12453 | + | |
12454 | + /* Check for any remaining sectors after the last split. This could | |
12455 | + * potentially be the whole request, but that should be a rare case | |
12456 | + * because requests should only be processed by the thread if we know | |
12457 | + * an error occurred or they contained one or more remapped sectors. | |
12458 | + */ | |
12459 | + if ( count ) { | |
12460 | + rc = INIT_IO(bbr_id->source, rw, starting_lsn, count, buffer); | |
12461 | + if (rc) { | |
12462 | + /* If this I/O failed, then one of the sectors in this | |
12463 | + * request needs to be relocated. | |
12464 | + */ | |
12465 | + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn, | |
12466 | + count, buffer); | |
12467 | + if (rc) { | |
12468 | + return rc; | |
12469 | + } | |
12470 | + } | |
12471 | + } | |
12472 | + | |
12473 | + return 0; | |
12474 | +} | |
12475 | + | |
12476 | +/** | |
12477 | + * bbr_io_handler | |
12478 | + * | |
12479 | + * This is the handler for the bbr_io_thread. It continuously loops, | |
12480 | + * taking I/O requests off its list and processing them. If nothing | |
12481 | + * is on the list, the thread goes back to sleep until specifically | |
12482 | + * woken up. | |
12483 | + * | |
12484 | + * I/O requests should only be sent to this thread if we know that: | |
12485 | + * a) the request contains at least one remapped sector. | |
12486 | + * or | |
12487 | + * b) the request caused an error on the normal I/O path. | |
12488 | + * This function uses synchronous I/O, so sending a request to this | |
12489 | + * thread that doesn't need special processing will cause severe | |
12490 | + * performance degredation. | |
12491 | + */ | |
12492 | +static void bbr_io_handler(void * void_data) | |
12493 | +{ | |
12494 | + struct bbr_io_buffer * bbr_io_buf; | |
12495 | + struct buffer_head * bh; | |
12496 | + unsigned long flags; | |
12497 | + int rc = 0; | |
12498 | + | |
12499 | + while (1) { | |
12500 | + /* Process bbr_io_list, one entry at a time. */ | |
12501 | + spin_lock_irqsave(&bbr_io_list_lock, flags); | |
12502 | + if (list_empty(&bbr_io_list)) { | |
12503 | + /* No more items on the list. */ | |
12504 | + spin_unlock_irqrestore(&bbr_io_list_lock, flags); | |
12505 | + break; | |
12506 | + } | |
12507 | + bbr_io_buf = list_entry(bbr_io_list.next, | |
12508 | + struct bbr_io_buffer, bbr_io_list); | |
12509 | + list_del(&bbr_io_buf->bbr_io_list); | |
12510 | + spin_unlock_irqrestore(&bbr_io_list_lock, flags); | |
12511 | + | |
12512 | + rc = bbr_io_process_request(bbr_io_buf); | |
12513 | + | |
12514 | + /* Clean up and complete the original I/O. */ | |
12515 | + bh = bbr_io_buf->bh; | |
12516 | + if (bh->b_end_io) { | |
12517 | + free_bbr_io_buf(bbr_io_buf); | |
12518 | + evms_cs_volume_request_in_progress(bh->b_rdev, -1, NULL); | |
12519 | + bh->b_end_io(bh, rc ? 0 : 1); | |
12520 | + } else { | |
12521 | + /* A request that originated from bbr_init_io. */ | |
12522 | + bbr_io_buf->rc = rc; | |
12523 | + complete(bbr_io_buf->complete); | |
12524 | + } | |
12525 | + } | |
12526 | +} | |
12527 | + | |
12528 | +/** | |
12529 | + * bbr_schedule_io | |
12530 | + * | |
12531 | + * Place the specified bbr_io_buf on the thread's processing list. | |
12532 | + */ | |
12533 | +static void bbr_schedule_io(struct bbr_io_buffer * bbr_io_buf) | |
12534 | +{ | |
12535 | + unsigned long flags; | |
12536 | + | |
12537 | + spin_lock_irqsave(&bbr_io_list_lock, flags); | |
12538 | + list_add_tail(&bbr_io_buf->bbr_io_list, &bbr_io_list); | |
12539 | + spin_unlock_irqrestore(&bbr_io_list_lock, flags); | |
12540 | + evms_cs_wakeup_thread(bbr_io_thread); | |
12541 | +} | |
12542 | + | |
12543 | +/** | |
12544 | + * bbr_read | |
12545 | + * | |
12546 | + * If there are any remapped sectors on this object, send this request over | |
12547 | + * to the thread for processing. Otherwise send it down the stack normally. | |
12548 | + */ | |
12549 | +static void bbr_read(struct evms_logical_node * bbr_node, | |
12550 | + struct buffer_head * bh ) | |
12551 | +{ | |
12552 | + struct bbr_private * bbr_id = bbr_node->private; | |
12553 | + struct bbr_io_buffer * bbr_io_buf; | |
12554 | + | |
12555 | + if ( bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) > | |
12556 | + bbr_node->total_vsectors ) { | |
12557 | + /* Request is off the end of the object. */ | |
12558 | + bh->b_end_io(bh, 0); | |
12559 | + return; | |
12560 | + } | |
12561 | + | |
12562 | + if ( atomic_read(&bbr_id->in_use_replacement_blks) == 0 || | |
12563 | + bbr_id->flag & BBR_STOP_REMAP || | |
12564 | + ! bbr_remap_probe(bbr_id, bh->b_rsector, | |
12565 | + bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) ) { | |
12566 | + /* No existing remaps, this request doesn't contain any | |
12567 | + * remapped sectors, or the engine told us not to remap. | |
12568 | + */ | |
12569 | + R_IO(bbr_id->source, bh); | |
12570 | + return; | |
12571 | + } | |
12572 | + | |
12573 | + /* This request has at least one remapped sector. */ | |
12574 | + bbr_io_buf = allocate_bbr_io_buf(bbr_id, bh, READ); | |
12575 | + if (!bbr_io_buf) { | |
12576 | + /* Can't get memory to track the I/O. */ | |
12577 | + bh->b_end_io(bh, 0); | |
12578 | + return; | |
12579 | + } | |
12580 | + | |
12581 | + evms_cs_volume_request_in_progress(bbr_io_buf->bh->b_rdev, +1, NULL); | |
12582 | + bbr_schedule_io(bbr_io_buf); | |
12583 | +} | |
12584 | + | |
12585 | +/** | |
12586 | + * bbr_write_callback | |
12587 | + * | |
12588 | + * This is the callback for normal write requests. Check for an error | |
12589 | + * during the I/O, and send to the thread for processing if necessary. | |
12590 | + */ | |
12591 | +static void bbr_write_callback(struct buffer_head * bh, | |
12592 | + int uptodate) | |
12593 | +{ | |
12594 | + struct bbr_io_buffer * bbr_io_buf = bh->b_private; | |
12595 | + | |
12596 | + bh->b_end_io = bbr_io_buf->org_end_io; | |
12597 | + bh->b_private = bbr_io_buf->org_private; | |
12598 | + bh->b_rsector = bbr_io_buf->org_rsector; | |
12599 | + bh->b_rdev = bbr_io_buf->org_dev; | |
12600 | + | |
12601 | + if (!(bbr_io_buf->bbr_id->flag & BBR_STOP_REMAP) && | |
12602 | + !uptodate) { | |
12603 | + LOG_ERROR("Object %s: Write failure on sector ("PFU64"). Scheduling for retry.\n", | |
12604 | + bbr_io_buf->bbr_id->node->name, (u64)bbr_io_buf->bh->b_rsector); | |
12605 | + bbr_schedule_io(bbr_io_buf); | |
12606 | + } else { | |
12607 | + free_bbr_io_buf(bbr_io_buf); | |
12608 | + evms_cs_volume_request_in_progress(bh->b_rdev, -1, NULL); | |
12609 | + bh->b_end_io(bh, uptodate); | |
12610 | + } | |
12611 | +} | |
12612 | + | |
12613 | +/** | |
12614 | + * bbr_write | |
12615 | + * | |
12616 | + * If there are any remapped sectors on this object, send the request over | |
12617 | + * to the thread for processing. Otherwise, register for callback | |
12618 | + * notification, and send the request down normally. | |
12619 | + */ | |
12620 | +static void bbr_write(struct evms_logical_node * bbr_node, | |
12621 | + struct buffer_head * bh) | |
12622 | +{ | |
12623 | + struct bbr_private * bbr_id = bbr_node->private; | |
12624 | + struct bbr_io_buffer * bbr_io_buf; | |
12625 | + | |
12626 | + if ( bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) > | |
12627 | + bbr_node->total_vsectors || | |
12628 | + bbr_id->flag & EVMS_VOLUME_READ_ONLY ) { | |
12629 | + /* Request is off the end of the object, or this | |
12630 | + * is a read-only object. | |
12631 | + */ | |
12632 | + bh->b_end_io(bh, 0); | |
12633 | + return; | |
12634 | + } | |
12635 | + | |
12636 | + bbr_io_buf = allocate_bbr_io_buf(bbr_id, bh, WRITE); | |
12637 | + if (!bbr_io_buf) { | |
12638 | + /* Can't get memory to track the I/O. */ | |
12639 | + bh->b_end_io(bh, 0); | |
12640 | + return; | |
12641 | + } | |
12642 | + | |
12643 | + evms_cs_volume_request_in_progress(bh->b_rdev, +1, NULL); | |
12644 | + | |
12645 | + if ( atomic_read(&bbr_id->in_use_replacement_blks) == 0 || | |
12646 | + bbr_id->flag & BBR_STOP_REMAP || | |
12647 | + ! bbr_remap_probe(bbr_id, bh->b_rsector, | |
12648 | + bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) ) { | |
12649 | + /* No existing remaps, this request contains no remapped | |
12650 | + * sectors, or the engine said to stop remapping. | |
12651 | + */ | |
12652 | + bbr_io_buf->org_end_io = bh->b_end_io; | |
12653 | + bbr_io_buf->org_private = bh->b_private; | |
12654 | + bbr_io_buf->org_rsector = bh->b_rsector; | |
12655 | + bbr_io_buf->org_dev = bh->b_rdev; | |
12656 | + bh->b_end_io = bbr_write_callback; | |
12657 | + bh->b_private = bbr_io_buf; | |
12658 | + W_IO(bbr_id->source, bh); | |
12659 | + } else { | |
12660 | + /* This request contains at least one remapped sector. */ | |
12661 | + bbr_schedule_io(bbr_io_buf); | |
12662 | + } | |
12663 | +} | |
12664 | + | |
12665 | +/** | |
12666 | + * bbr_init_io_schedule_io | |
12667 | + * @bbr_id: Private data for the BBR node. | |
12668 | + * @rw: READ or WRITE. | |
12669 | + * @lsn: Starting sector for the request. | |
12670 | + * @count: Number of sectors in the request. | |
12671 | + * @buffer: Data buffer for the request. | |
12672 | + * | |
12673 | + * During init_io, failures must still be handled by the I/O thread. Create | |
12674 | + * a bbr_io_buf, and schedule it to be handled by the thread. Then wait until | |
12675 | + * the request is complete. | |
12676 | + */ | |
12677 | +static int bbr_init_io_schedule_io(struct bbr_private * bbr_id, | |
12678 | + int rw, | |
12679 | + u64 lsn, | |
12680 | + u64 count, | |
12681 | + void * buffer) | |
12682 | +{ | |
12683 | + struct bbr_io_buffer * bbr_io_buf; | |
12684 | + struct buffer_head bh; | |
12685 | + struct completion complete; | |
12686 | + int rc = 0; | |
12687 | + | |
12688 | + if ( rw != WRITE ) { | |
12689 | + /* Nothing can be done about read failures. */ | |
12690 | + return -EIO; | |
12691 | + } | |
12692 | + | |
12693 | + LOG_ERROR("Object %s: init_io write failure (sector "PFU64": count "PFU64"). Scheduling for retry.\n", | |
12694 | + bbr_id->node->name, lsn, count); | |
12695 | + bbr_io_buf = allocate_bbr_io_buf(bbr_id, &bh, rw); | |
12696 | + if (!bbr_io_buf) { | |
12697 | + return -ENOMEM; | |
12698 | + } | |
12699 | + | |
12700 | + memset(&bh, 0, sizeof(struct buffer_head)); | |
12701 | + init_waitqueue_head(&bh.b_wait); | |
12702 | + bh.b_rsector = lsn; | |
12703 | + bh.b_size = count << EVMS_VSECTOR_SIZE_SHIFT; | |
12704 | + bh.b_data = buffer; | |
12705 | + bh.b_end_io = NULL; | |
12706 | + | |
12707 | + /* Schedule the I/O and wait for it to finish. */ | |
12708 | + bbr_io_buf->complete = &complete; | |
12709 | + init_completion(bbr_io_buf->complete); | |
12710 | + bbr_schedule_io(bbr_io_buf); | |
12711 | + wait_for_completion(bbr_io_buf->complete); | |
12712 | + | |
12713 | + rc = bbr_io_buf->rc; | |
12714 | + free_bbr_io_buf(bbr_io_buf); | |
12715 | + | |
12716 | + return rc; | |
12717 | +} | |
12718 | + | |
12719 | +/** | |
12720 | + * bbr_init_io | |
12721 | + * @bbr_node: BBR node. | |
12722 | + * @rw: READ or WRITE. | |
12723 | + * @lsn: Starting sector for I/O request. | |
12724 | + * @count: Number of sectors in the I/O request. | |
12725 | + * @buffer: Data buffer for the I/O request. | |
12726 | + * | |
12727 | + * Synchronous I/O requests. | |
12728 | + */ | |
12729 | +static int bbr_init_io(struct evms_logical_node * bbr_node, | |
12730 | + int rw, | |
12731 | + u64 start_lsn, | |
12732 | + u64 count, | |
12733 | + void * buffer ) | |
12734 | +{ | |
12735 | + struct bbr_private * bbr_id = bbr_node->private; | |
12736 | + u64 lsn; | |
12737 | + int rc = 0; | |
12738 | + | |
12739 | + if ( start_lsn + count > bbr_node->total_vsectors ) { | |
12740 | + /* Request is off the end of the object. */ | |
12741 | + return -EINVAL; | |
12742 | + } | |
12743 | + | |
12744 | + if ( rw == WRITE && (bbr_id->flag & EVMS_VOLUME_READ_ONLY) ) { | |
12745 | + /* Can't write to a read-only object. */ | |
12746 | + return -EINVAL; | |
12747 | + } | |
12748 | + | |
12749 | + if ( bbr_id->flag & BBR_STOP_REMAP || | |
12750 | + atomic_read(&bbr_id->in_use_replacement_blks) == 0 || | |
12751 | + ! bbr_remap_probe(bbr_id, start_lsn, count) ) { | |
12752 | + /* Normal case (no existing remaps). */ | |
12753 | + rc = INIT_IO(bbr_id->source, rw, start_lsn, count, buffer); | |
12754 | + if (rc && ! (bbr_id->flag & BBR_STOP_REMAP) ) { | |
12755 | + /* Init_io error. Send request over to | |
12756 | + * thread for further processing. | |
12757 | + */ | |
12758 | + rc = bbr_init_io_schedule_io(bbr_id, rw, start_lsn, | |
12759 | + count, buffer); | |
12760 | + } | |
12761 | + } else { | |
12762 | + /* At least one sector in this request needs to be remapped. | |
12763 | + * Test and send each one down individually. | |
12764 | + */ | |
12765 | + for ( lsn = start_lsn; | |
12766 | + lsn < start_lsn + count; | |
12767 | + lsn++, buffer += EVMS_VSECTOR_SIZE ) { | |
12768 | + bbr_remap(bbr_id, &lsn); | |
12769 | + rc = INIT_IO(bbr_id->source, rw, lsn, 1, buffer); | |
12770 | + if (rc) { | |
12771 | + /* Init_io error. Send request | |
12772 | + * to thread for processing. | |
12773 | + */ | |
12774 | + rc = bbr_init_io_schedule_io(bbr_id, rw, | |
12775 | + lsn, 1, buffer); | |
12776 | + if (rc) { | |
12777 | + break; | |
12778 | + } | |
12779 | + } | |
12780 | + } | |
12781 | + } | |
12782 | + | |
12783 | + return rc; | |
12784 | +} | |
12785 | + | |
12786 | +/** | |
12787 | + * bbr_direct_ioctl_sector_io | |
12788 | + * | |
12789 | + * Process an I/O from the engine on an active BBR object. | |
12790 | + */ | |
12791 | +static int bbr_direct_ioctl_sector_io(struct bbr_private * bbr_id, | |
12792 | + struct evms_notify_bbr * notify) | |
12793 | +{ | |
12794 | + char * buffer, * user_buffer; | |
12795 | + u64 lsn; | |
12796 | + int rc = 0; | |
12797 | + | |
12798 | + buffer = kmalloc(EVMS_VSECTOR_SIZE, GFP_NOIO); | |
12799 | + if (!buffer) { | |
12800 | + return -ENOMEM; | |
12801 | + } | |
12802 | + | |
12803 | + user_buffer = (char*)notify->buffer; | |
12804 | + | |
12805 | + for ( lsn = 0; | |
12806 | + lsn < notify->nr_sect; | |
12807 | + lsn++, user_buffer += EVMS_VSECTOR_SIZE ) { | |
12808 | + if ( notify->rw == WRITE ) { | |
12809 | + if ( copy_from_user(buffer, user_buffer, | |
12810 | + EVMS_VSECTOR_SIZE) ) { | |
12811 | + rc = -EFAULT; | |
12812 | + break; | |
12813 | + } | |
12814 | + } | |
12815 | + | |
12816 | + rc = bbr_init_io(bbr_id->node, notify->rw, | |
12817 | + notify->start_sect + lsn, 1, buffer); | |
12818 | + if (rc) { | |
12819 | + break; | |
12820 | + } | |
12821 | + | |
12822 | + if ( notify->rw == READ ) { | |
12823 | + if ( copy_to_user(user_buffer, buffer, | |
12824 | + EVMS_VSECTOR_SIZE) ) { | |
12825 | + rc = -EFAULT; | |
12826 | + break; | |
12827 | + } | |
12828 | + } | |
12829 | + } | |
12830 | + | |
12831 | + kfree(buffer); | |
12832 | + return rc; | |
12833 | +} | |
12834 | + | |
12835 | +/** | |
12836 | + * bbr_direct_ioctl | |
12837 | + * @inode: N/A | |
12838 | + * @file: N/A | |
12839 | + * @cmd: N/A | |
12840 | + * @arg: Pointer to an evms_plugin_ioctl_pkt. | |
12841 | + * | |
12842 | + * BBR-specific ioctls from the engine. Currently handles: | |
12843 | + * BBR_STOP_REMAP_CMD | |
12844 | + * BBR_GET_INFO_CMD | |
12845 | + * BBR_SECTOR_IO_CMD | |
12846 | + */ | |
12847 | +static int bbr_direct_ioctl(struct inode * inode, | |
12848 | + struct file * file, | |
12849 | + unsigned int cmd, | |
12850 | + unsigned long arg) | |
12851 | +{ | |
12852 | + int rc = 0; | |
12853 | + struct bbr_private * bbr_id; | |
12854 | + struct evms_plugin_ioctl_pkt pkt, * user_pkt; | |
12855 | + struct evms_notify_bbr notify, * user_notify; | |
12856 | + | |
12857 | + MOD_INC_USE_COUNT; | |
12858 | + | |
12859 | + user_pkt = (struct evms_plugin_ioctl_pkt *)arg; | |
12860 | + if ( copy_from_user(&pkt, user_pkt, sizeof(pkt)) ) { | |
12861 | + MOD_DEC_USE_COUNT; | |
12862 | + return -EFAULT; | |
12863 | + } | |
12864 | + | |
12865 | + if ( pkt.feature_id != plugin_header.id ) { | |
12866 | + MOD_DEC_USE_COUNT; | |
12867 | + return -EINVAL; | |
12868 | + } | |
12869 | + | |
12870 | + user_notify = (struct evms_notify_bbr *)pkt.feature_ioctl_data; | |
12871 | + if ( copy_from_user(¬ify, user_notify, sizeof(notify)) ) { | |
12872 | + rc = -EFAULT; | |
12873 | + } else { | |
12874 | + bbr_id = bbr_find_private(notify.object_name); | |
12875 | + if (!bbr_id) { | |
12876 | + rc = -ENODEV; | |
12877 | + } else { | |
12878 | + | |
12879 | + switch(pkt.feature_command) { | |
12880 | + | |
12881 | + case BBR_STOP_REMAP_CMD: | |
12882 | + bbr_id->flag |= BBR_STOP_REMAP; | |
12883 | + /* Fall through. */ | |
12884 | + | |
12885 | + case BBR_GET_INFO_CMD: | |
12886 | + notify.count = atomic_read(&bbr_id->in_use_replacement_blks); | |
12887 | + if ( copy_to_user(&user_notify->count, | |
12888 | + ¬ify.count, | |
12889 | + sizeof(user_notify->count))) { | |
12890 | + rc = -EFAULT; | |
12891 | + } | |
12892 | + break; | |
12893 | + | |
12894 | + case BBR_SECTOR_IO_CMD: | |
12895 | + rc = bbr_direct_ioctl_sector_io(bbr_id, | |
12896 | + ¬ify); | |
12897 | + break; | |
12898 | + | |
12899 | + default: | |
12900 | + rc = -ENOSYS; | |
12901 | + } | |
12902 | + } | |
12903 | + } | |
12904 | + | |
12905 | + pkt.status = rc; | |
12906 | + copy_to_user(user_pkt, &pkt, sizeof(pkt)); | |
12907 | + MOD_DEC_USE_COUNT; | |
12908 | + return rc; | |
12909 | +} | |
12910 | + | |
12911 | +/** | |
12912 | + * bbr_ioctl | |
12913 | + * @bbr_node: BBR node. | |
12914 | + * @inode: N/A | |
12915 | + * @file: N/A | |
12916 | + * @cmd: ioctl command to process. | |
12917 | + * @arg: ioctl-specific data pointer. | |
12918 | + * | |
12919 | + * IOCTL handler. Currently BBR handles plugin-specific ioctls, as well as | |
12920 | + * EVMS_GET_BMAP. All others are passed to the child node. | |
12921 | + */ | |
12922 | +static int bbr_ioctl (struct evms_logical_node * bbr_node, | |
12923 | + struct inode * inode, | |
12924 | + struct file * file, | |
12925 | + unsigned int cmd, | |
12926 | + unsigned long arg) | |
12927 | +{ | |
12928 | + struct bbr_private * bbr_id = bbr_node->private; | |
12929 | + struct evms_get_bmap_pkt * bmap; | |
12930 | + int rc = 0; | |
12931 | + | |
12932 | + switch (cmd) { | |
12933 | + case EVMS_PLUGIN_IOCTL: | |
12934 | + rc = bbr_direct_ioctl(inode, file, cmd, arg); | |
12935 | + break; | |
12936 | + | |
12937 | + case EVMS_GET_BMAP: | |
12938 | + bmap = (struct evms_get_bmap_pkt *)arg; | |
12939 | + bbr_remap(bbr_id, &bmap->rsector); | |
12940 | + /* fall thru */ | |
12941 | + | |
12942 | + default: | |
12943 | + rc = IOCTL(bbr_id->source, inode, file, cmd, arg); | |
12944 | + } | |
12945 | + return rc; | |
12946 | +} | |
12947 | + | |
12948 | +static int __init bbr_init(void) | |
12949 | +{ | |
12950 | + return evms_cs_register_plugin(&plugin_header); | |
12951 | +} | |
12952 | + | |
12953 | +static void __exit bbr_exit(void) | |
12954 | +{ | |
12955 | + evms_cs_unregister_plugin(&plugin_header); | |
12956 | +} | |
12957 | + | |
12958 | +module_init(bbr_init); | |
12959 | +module_exit(bbr_exit); | |
12960 | +#ifdef MODULE_LICENSE | |
12961 | +MODULE_LICENSE("GPL"); | |
12962 | +#endif | |
12963 | + | |
12964 | diff -Naur linux-2002-09-30/drivers/evms/evms_drivelink.c evms-2002-09-30/drivers/evms/evms_drivelink.c | |
12965 | --- linux-2002-09-30/drivers/evms/evms_drivelink.c Wed Dec 31 18:00:00 1969 | |
12966 | +++ evms-2002-09-30/drivers/evms/evms_drivelink.c Fri Sep 13 16:09:55 2002 | |
12967 | @@ -0,0 +1,1274 @@ | |
12968 | +/* -*- linux-c -*- | |
12969 | + * | |
12970 | + * | |
12971 | + * Copyright (c) International Business Machines Corp., 2000 | |
12972 | + * | |
12973 | + * This program is free software; you can redistribute it and/or modify | |
12974 | + * it under the terms of the GNU General Public License as published by | |
12975 | + * the Free Software Foundation; either version 2 of the License, or | |
12976 | + * (at your option) any later version. | |
12977 | + * | |
12978 | + * This program is distributed in the hope that it will be useful, | |
12979 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12980 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
12981 | + * the GNU General Public License for more details. | |
12982 | + * | |
12983 | + * You should have received a copy of the GNU General Public License | |
12984 | + * along with this program; if not, write to the Free Software | |
12985 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
12986 | + * | |
12987 | + * | |
12988 | + */ | |
12989 | +/* | |
12990 | + * linux/drivers/evms/drvlink.c | |
12991 | + | |
12992 | + * | |
12993 | + * EVMS Drive Linking Feature. | |
12994 | + * | |
12995 | + * This feature provides the ability to link multiple storage objects | |
12996 | + * together as a single virtual storage object. | |
12997 | + * | |
12998 | + */ | |
12999 | + | |
13000 | +#include <linux/module.h> | |
13001 | +#include <linux/kernel.h> | |
13002 | +#include <linux/config.h> | |
13003 | +#include <linux/genhd.h> | |
13004 | +#include <linux/blk.h> | |
13005 | +#include <linux/evms/evms.h> | |
13006 | +#include <linux/evms/evms_drivelink.h> | |
13007 | +#include <asm/uaccess.h> | |
13008 | + | |
13009 | +#define LOG_PREFIX "drivelink: " | |
13010 | + | |
13011 | +/* prototypes for mandatory plugin interface functions */ | |
13012 | +static int drivelink_discover(struct evms_logical_node **); | |
13013 | +static int drivelink_delete(struct evms_logical_node *); | |
13014 | +static void drivelink_read(struct evms_logical_node *, struct buffer_head *); | |
13015 | +static void drivelink_write(struct evms_logical_node *, struct buffer_head *); | |
13016 | +static int drivelink_ioctl(struct evms_logical_node *, | |
13017 | + struct inode *, | |
13018 | + struct file *, unsigned int, unsigned long); | |
13019 | +static int drivelink_init_io(struct evms_logical_node *, | |
13020 | + int, u64, u64, void *); | |
13021 | + | |
13022 | +/* plugin function table definition */ | |
13023 | +static struct evms_plugin_fops fops = { | |
13024 | + .discover = drivelink_discover, | |
13025 | + .delete = drivelink_delete, | |
13026 | + .read = drivelink_read, | |
13027 | + .write = drivelink_write, | |
13028 | + .init_io = drivelink_init_io, | |
13029 | + .ioctl = drivelink_ioctl | |
13030 | +}; | |
13031 | + | |
13032 | +/* plugin header definition */ | |
13033 | +static struct evms_plugin_header plugin_header = { | |
13034 | + .id = SetPluginID(IBM_OEM_ID, | |
13035 | + EVMS_FEATURE, | |
13036 | + EVMS_DRIVELINK_FEATURE_ID), | |
13037 | + .version = { | |
13038 | + .major = 2, | |
13039 | + .minor = 0, | |
13040 | + .patchlevel = 1 | |
13041 | + }, | |
13042 | + .required_services_version = { | |
13043 | + .major = 0, | |
13044 | + .minor = 5, | |
13045 | + .patchlevel = 0 | |
13046 | + }, | |
13047 | + .fops = &fops | |
13048 | +}; | |
13049 | + | |
13050 | +/********************************************************/ | |
13051 | +/* Required Plugin Function Table Entry Point: */ | |
13052 | +/* Discover function & Support routines */ | |
13053 | +/********************************************************/ | |
13054 | + | |
13055 | +/** | |
13056 | + * le_feature_data_to_cpu: | |
13057 | + * @md: drivelink metadata | |
13058 | + * | |
13059 | + * convert feature data from on-disk (Little Endian) format | |
13060 | + * to the native cpu endian format. | |
13061 | +**/ | |
13062 | +static void | |
13063 | +le_feature_data_to_cpu(struct evms_drivelink_metadata *md) | |
13064 | +{ | |
13065 | + int i; | |
13066 | + | |
13067 | + md->signature = le32_to_cpup(&md->signature); | |
13068 | + md->crc = le32_to_cpup(&md->crc); | |
13069 | + md->version.major = le32_to_cpup(&md->version.major); | |
13070 | + md->version.minor = le32_to_cpup(&md->version.minor); | |
13071 | + md->version.patchlevel = le32_to_cpup(&md->version.patchlevel); | |
13072 | + md->flags = le32_to_cpup(&md->flags); | |
13073 | + md->sequence_number = le64_to_cpup(&md->sequence_number); | |
13074 | + md->child_serial_number = le64_to_cpup(&md->child_serial_number); | |
13075 | + md->parent_serial_number = le64_to_cpup(&md->parent_serial_number); | |
13076 | + md->child_count = le64_to_cpup(&md->child_count); | |
13077 | + for (i = 0; i < EVMS_DRIVELINK_MAX_ENTRIES; i++) { | |
13078 | + struct evms_dl_ordering_table_entry *child_entry; | |
13079 | + | |
13080 | + child_entry = &md->ordering_table[i]; | |
13081 | + child_entry->child_serial_number = | |
13082 | + le64_to_cpup(&child_entry->child_serial_number); | |
13083 | + child_entry->child_vsize = | |
13084 | + le64_to_cpup(&child_entry->child_vsize); | |
13085 | + } | |
13086 | +} | |
13087 | + | |
13088 | +/** | |
13089 | + * load_feature_data: load a feature header from disk | |
13090 | + * @node: storage object | |
13091 | + * @md: ptr to drivelink metadata | |
13092 | + * | |
13093 | + * loads and verifies redundant copies of drivelink metadata. @md is modified | |
13094 | + * and returned to the caller. | |
13095 | + * | |
13096 | + * Return value: 0 on success | |
13097 | + * Otherwise error code | |
13098 | +**/ | |
13099 | +static int | |
13100 | +load_feature_data(struct evms_logical_node *node, | |
13101 | + struct evms_drivelink_metadata **md) | |
13102 | +{ | |
13103 | + int i, rc = 0, rc_array[2] = { 0, 0 }, size_in_bytes; | |
13104 | + u64 real_metadata_size, feature_data_size; | |
13105 | + u64 starting_sector; | |
13106 | + struct evms_drivelink_metadata *cur_md, *md1, *md2 = NULL; | |
13107 | + char *location_name; | |
13108 | + | |
13109 | + /* verify the feature metadata size from the */ | |
13110 | + /* feature header agrees with the real size */ | |
13111 | + /* of the current metadata structure. */ | |
13112 | + real_metadata_size = evms_cs_size_in_vsectors(sizeof (**md)); | |
13113 | + | |
13114 | + /* allocate a buffer large enough to hold all */ | |
13115 | + /* sectors containing the feature's metadata */ | |
13116 | + size_in_bytes = real_metadata_size * EVMS_VSECTOR_SIZE; | |
13117 | + md1 = kmalloc(size_in_bytes, GFP_KERNEL); | |
13118 | + if (md1) { | |
13119 | + md2 = kmalloc(size_in_bytes, GFP_KERNEL); | |
13120 | + if (!md2) { | |
13121 | + kfree(md1); | |
13122 | + rc = -ENOMEM; | |
13123 | + } | |
13124 | + } else { | |
13125 | + rc = -ENOMEM; | |
13126 | + } | |
13127 | + if (!rc) { | |
13128 | + for (i = 0; i < 2; i++) { | |
13129 | + if (i == 0) { | |
13130 | + starting_sector = | |
13131 | + node->feature_header-> | |
13132 | + feature_data1_start_lsn; | |
13133 | + feature_data_size = | |
13134 | + node->feature_header->feature_data1_size; | |
13135 | + cur_md = md1; | |
13136 | + location_name = evms_primary_string; | |
13137 | + } else { | |
13138 | + starting_sector = | |
13139 | + node->feature_header-> | |
13140 | + feature_data2_start_lsn; | |
13141 | + feature_data_size = | |
13142 | + node->feature_header->feature_data2_size; | |
13143 | + cur_md = md2; | |
13144 | + location_name = evms_secondary_string; | |
13145 | + } | |
13146 | + /* check that real metadata size matches the */ | |
13147 | + /* feature data size */ | |
13148 | + if (real_metadata_size != feature_data_size) { | |
13149 | + LOG_ERROR | |
13150 | + ("%s feature data size("PFU64" bytes) doesn't match expected size("PFU64" bytes).\n", | |
13151 | + location_name, | |
13152 | + feature_data_size << | |
13153 | + EVMS_VSECTOR_SIZE_SHIFT, | |
13154 | + real_metadata_size << | |
13155 | + EVMS_VSECTOR_SIZE_SHIFT); | |
13156 | + rc = -EINVAL; | |
13157 | + rc_array[i] = rc; | |
13158 | + continue; | |
13159 | + } | |
13160 | + /* load the node's feature data */ | |
13161 | + rc = INIT_IO(node, | |
13162 | + 0, | |
13163 | + starting_sector, | |
13164 | + feature_data_size, cur_md); | |
13165 | + if (rc) { | |
13166 | + LOG_ERROR | |
13167 | + ("error(%d) probing for %s feature data at sector("PFU64") on '%s'.\n", | |
13168 | + rc, location_name, starting_sector, | |
13169 | + node->name); | |
13170 | + rc_array[i] = rc; | |
13171 | + continue; | |
13172 | + } | |
13173 | + /* check for valid metadata signature */ | |
13174 | + if (le32_to_cpup(&cur_md->signature) != | |
13175 | + EVMS_DRIVELINK_SIGNATURE) { | |
13176 | + rc = -ENODATA; | |
13177 | + LOG_SERIOUS | |
13178 | + ("error(%d) invalid signature in %s feature data on '%s'\n", | |
13179 | + rc, location_name, node->name); | |
13180 | + rc_array[i] = rc; | |
13181 | + continue; | |
13182 | + } | |
13183 | + /* validate feature data CRC */ | |
13184 | + if (cur_md->crc != EVMS_MAGIC_CRC) { | |
13185 | + int org_crc, final_crc; | |
13186 | + org_crc = le32_to_cpup(&cur_md->crc); | |
13187 | + cur_md->crc = 0; | |
13188 | + final_crc = | |
13189 | + evms_cs_calculate_crc(EVMS_INITIAL_CRC, | |
13190 | + cur_md, | |
13191 | + sizeof (*cur_md)); | |
13192 | + if (final_crc != org_crc) { | |
13193 | + LOG_ERROR | |
13194 | + ("CRC mismatch error [stored(%x), computed(%x)] in %s feature data on '%s'.\n", | |
13195 | + org_crc, final_crc, location_name, | |
13196 | + node->name); | |
13197 | + rc = -EINVAL; | |
13198 | + rc_array[i] = rc; | |
13199 | + continue; | |
13200 | + } | |
13201 | + } else { | |
13202 | + LOG_WARNING | |
13203 | + ("CRC disabled in %s feature data on '%s'.\n", | |
13204 | + location_name, node->name); | |
13205 | + } | |
13206 | + /* convert feature data from on-disk | |
13207 | + * format (Little Endian) to native | |
13208 | + * cpu endian format. | |
13209 | + */ | |
13210 | + le_feature_data_to_cpu(cur_md); | |
13211 | + /* check for valid structure version */ | |
13212 | + rc = evms_cs_check_version(&metadata_ver, | |
13213 | + &cur_md->version); | |
13214 | + if (rc) { | |
13215 | + LOG_SERIOUS | |
13216 | + ("error(%d) obsolete version detected: actual(%d,%d,%d), requires(%d,%d,%d) in %s feature data on '%s'\n", | |
13217 | + rc, cur_md->version.major, | |
13218 | + cur_md->version.minor, | |
13219 | + cur_md->version.patchlevel, | |
13220 | + DRIVELINK_METADATA_MAJOR, | |
13221 | + DRIVELINK_METADATA_MINOR, | |
13222 | + DRIVELINK_METADATA_PATCHLEVEL, | |
13223 | + location_name, node->name); | |
13224 | + rc_array[i] = rc; | |
13225 | + } | |
13226 | + } | |
13227 | + /* getting same return code for both copies? */ | |
13228 | + if (rc_array[0] == rc_array[1]) { | |
13229 | + rc = rc_array[0]; | |
13230 | + /* if no errors on both copies, | |
13231 | + * check the sequence numbers. | |
13232 | + * use the highest sequence number. | |
13233 | + */ | |
13234 | + if (!rc) { | |
13235 | + /* compare sequence numbers */ | |
13236 | + if (md1->sequence_number == | |
13237 | + md2->sequence_number) { | |
13238 | + cur_md = md1; | |
13239 | + } else { | |
13240 | + LOG_WARNING | |
13241 | + ("sequence number mismatches between front("PFU64") and rear("PFU64") feature data copies on node(%s)!\n", | |
13242 | + md2->sequence_number, | |
13243 | + md1->sequence_number, node->name); | |
13244 | + if (md1->sequence_number > | |
13245 | + md2->sequence_number) | |
13246 | + cur_md = md1; | |
13247 | + else | |
13248 | + cur_md = md2; | |
13249 | + LOG_WARNING | |
13250 | + ("using %s feature data copy!\n", | |
13251 | + (cur_md == | |
13252 | + md1) ? evms_primary_string : | |
13253 | + evms_secondary_string); | |
13254 | + } | |
13255 | + } | |
13256 | + /* getting different return codes for each copy */ | |
13257 | + } else if (rc_array[0] == 0) { | |
13258 | + /* use 1st (rear) copy if its good */ | |
13259 | + rc = 0; | |
13260 | + cur_md = md1; | |
13261 | + } else if (rc_array[1] == 0) { | |
13262 | + /* use 2nd (front) copy if its good */ | |
13263 | + rc = 0; | |
13264 | + cur_md = md2; | |
13265 | + } else if ((rc_array[0] == -EINVAL) || (rc_array[1] == -EINVAL)) { | |
13266 | + /* fail if either give a fatal error */ | |
13267 | + rc = -EINVAL; | |
13268 | + cur_md = NULL; | |
13269 | + } | |
13270 | + | |
13271 | + /* deallocate metadata buffers appropriately */ | |
13272 | + if (rc || (cur_md == md1)) | |
13273 | + kfree(md2); | |
13274 | + if (rc || (cur_md == md2)) | |
13275 | + kfree(md1); | |
13276 | + | |
13277 | + /* save validated feature header pointer */ | |
13278 | + if (!rc) | |
13279 | + *md = cur_md; | |
13280 | + } | |
13281 | + return (rc); | |
13282 | +} | |
13283 | + | |
13284 | +/** | |
13285 | + * find_parent_node_for_child_node: finds or creates a parent node for this child node | |
13286 | + * @child_node: input, child node | |
13287 | + * @md: input, on-disk metadata | |
13288 | + * @parent_node: output, parent node | |
13289 | + * @dl_private: output, runtime metadata | |
13290 | + * @discover_list: input/output, list of objects being discovered | |
13291 | + * | |
13292 | + * finds or creates a parent node for the specified child node. if the parent node is | |
13293 | + * created, create and initialize the parent's private data area. | |
13294 | + * | |
13295 | + * Return value: 0 on success | |
13296 | + * Otherwise error code. | |
13297 | +**/ | |
13298 | +static int | |
13299 | +find_parent_node_for_child_node(struct evms_logical_node *child_node, | |
13300 | + struct evms_drivelink_metadata *md, | |
13301 | + struct evms_logical_node **parent_node, | |
13302 | + struct runtime_data **dl_private, | |
13303 | + struct evms_logical_node **discover_list) | |
13304 | +{ | |
13305 | + int rc = 0, parent_found = FALSE; | |
13306 | + struct evms_logical_node *parent = NULL; | |
13307 | + struct runtime_data *rd = NULL; | |
13308 | + | |
13309 | + /* find the parent node for this child */ | |
13310 | + for (parent = *discover_list; parent; parent = parent->next) { | |
13311 | + /* only parent nodes will have null feature headers */ | |
13312 | + if (!parent->feature_header) { | |
13313 | + rd = (struct runtime_data *) parent->private; | |
13314 | + if (rd->parent_sn == md->parent_serial_number) { | |
13315 | + parent_found = TRUE; | |
13316 | + break; | |
13317 | + } | |
13318 | + } | |
13319 | + } | |
13320 | + /* if no parent node found, create it */ | |
13321 | + if (parent_found == FALSE) { | |
13322 | + rc = evms_cs_allocate_logical_node(&parent); | |
13323 | + if (!rc) { | |
13324 | + /* transpose info from child to parent */ | |
13325 | + parent->flags |= child_node->flags; | |
13326 | + strcpy(parent->name, | |
13327 | + child_node->feature_header->object_name); | |
13328 | + /* copy evms system data to parent */ | |
13329 | + parent->volume_info = child_node->volume_info; | |
13330 | + /* initialize the plugin id field */ | |
13331 | + parent->plugin = &plugin_header; | |
13332 | + /* allocate parent's instance data */ | |
13333 | + parent->private = kmalloc(sizeof(*rd), GFP_KERNEL); | |
13334 | + if (!parent->private) | |
13335 | + rc = -ENOMEM; | |
13336 | + } | |
13337 | + if (!rc) { | |
13338 | + /* initialize some instance data fields */ | |
13339 | + rd = (struct runtime_data *) parent->private; | |
13340 | + rd->block_size = 0; | |
13341 | + rd->parent_sn = md->parent_serial_number; | |
13342 | + rd->child_count = md->child_count; | |
13343 | + /* allocate the child table */ | |
13344 | + rd->child_table = kmalloc(sizeof(struct runtime_entry) * | |
13345 | + rd->child_count, GFP_KERNEL); | |
13346 | + if (!rd->child_table) | |
13347 | + rc = -ENOMEM; | |
13348 | + } | |
13349 | + if (!rc) { | |
13350 | + memset(rd->child_table, 0, | |
13351 | + sizeof(struct runtime_entry) * rd->child_count); | |
13352 | + /* add the parent node to the discover list */ | |
13353 | + rc = evms_cs_add_logical_node_to_list(discover_list, | |
13354 | + parent); | |
13355 | + MOD_INC_USE_COUNT; | |
13356 | + } | |
13357 | + /* if any errors encountered, try to clean up */ | |
13358 | + if (rc) { | |
13359 | + LOG_SERIOUS("find_parent_node: rc(%d) from '%s'\n", | |
13360 | + rc, child_node->name); | |
13361 | + if (parent) { | |
13362 | + DELETE(parent); | |
13363 | + parent = NULL; | |
13364 | + rd = NULL; | |
13365 | + } | |
13366 | + } | |
13367 | + } | |
13368 | + | |
13369 | + *dl_private = rd; | |
13370 | + *parent_node = parent; | |
13371 | + | |
13372 | + return (rc); | |
13373 | +} | |
13374 | + | |
13375 | +/** | |
13376 | + * compute_child_index: compute the index for a specific child node | |
13377 | + * @node: the child node | |
13378 | + * @md: the drivelink on-disk metadata | |
13379 | + * | |
13380 | + * compute and return and 0-based index value of this child node's position | |
13381 | + * in the parent node's ordering table. | |
13382 | + * | |
13383 | + * Return value: -1 on error | |
13384 | + * otherwise the index of the specified child. | |
13385 | +**/ | |
13386 | +static int | |
13387 | +compute_child_index(struct evms_logical_node *node, | |
13388 | + struct evms_drivelink_metadata *md) | |
13389 | +{ | |
13390 | + int i, position = -1; | |
13391 | + | |
13392 | + for (i = 0; i < md->child_count; i++) { | |
13393 | + if (md->ordering_table[i].child_serial_number == | |
13394 | + md->child_serial_number) { | |
13395 | + position = i; | |
13396 | + break; | |
13397 | + } | |
13398 | + } | |
13399 | + if (position == -1) { | |
13400 | + LOG_SERIOUS("%s: child not found from '%s'\n", | |
13401 | + __FUNCTION__, node->name); | |
13402 | + } | |
13403 | + return (position); | |
13404 | +} | |
13405 | + | |
13406 | +/** | |
13407 | + * process_child_nodes: perform the discovery operation on each child node | |
13408 | + * @discover_list: the list of potential child objects | |
13409 | + * | |
13410 | + * search the discovery list of drivelink child nodes. for each node found, | |
13411 | + * perform the discovery operation on it. | |
13412 | + * | |
13413 | + * Return value: 0 on success | |
13414 | + * otherwise error code | |
13415 | +**/ | |
13416 | +static int | |
13417 | +process_child_nodes(struct evms_logical_node **discover_list) | |
13418 | +{ | |
13419 | + int rc = 0, index = -1; | |
13420 | + struct evms_logical_node *node, *next_node, *parent; | |
13421 | + struct evms_drivelink_metadata *md; | |
13422 | + struct runtime_data *rd; | |
13423 | + struct runtime_entry *child_entry = NULL; | |
13424 | + | |
13425 | + for (node = *discover_list; node; node = next_node) { | |
13426 | + next_node = node->next; | |
13427 | + if ((!node->feature_header) || | |
13428 | + (node->feature_header->feature_id != plugin_header.id)) { | |
13429 | + continue; | |
13430 | + } | |
13431 | + | |
13432 | + rc = evms_cs_remove_logical_node_from_list(discover_list, node); | |
13433 | + if (rc) | |
13434 | + BUG(); | |
13435 | + /* we need to load the feature data to */ | |
13436 | + /* find the parent's serial number this */ | |
13437 | + /* child node belongs to. */ | |
13438 | + md = NULL; | |
13439 | + rc = load_feature_data(node, &md); | |
13440 | + if (!rc) { | |
13441 | + /* find the parent node for this child */ | |
13442 | + parent = NULL; | |
13443 | + rc = find_parent_node_for_child_node(node, md, | |
13444 | + &parent, &rd, | |
13445 | + discover_list); | |
13446 | + } | |
13447 | + if (!rc) { | |
13448 | + /* determine position of child in drive link object */ | |
13449 | + index = compute_child_index(node, md); | |
13450 | + if (index == -1) | |
13451 | + rc = index; | |
13452 | + } | |
13453 | + if (!rc) { | |
13454 | + /* check for multiple child index requests */ | |
13455 | + child_entry = | |
13456 | + (struct runtime_entry *) &rd->child_table[index]; | |
13457 | + /* check to see if this child index is | |
13458 | + * already in use. | |
13459 | + */ | |
13460 | + if (child_entry->child_node) { | |
13461 | + LOG_SERIOUS | |
13462 | + ("attempt to put '%s' in child index(%d). Already occupied by '%s'.\n", | |
13463 | + node->name, index, | |
13464 | + child_entry->child_node->name); | |
13465 | + rc = -1; | |
13466 | + } | |
13467 | + } | |
13468 | + if (!rc) { | |
13469 | + /* fill in child info in parent */ | |
13470 | + | |
13471 | + /* check the sector size for this node */ | |
13472 | + if (node->hardsector_size > parent->hardsector_size) | |
13473 | + parent->hardsector_size = node->hardsector_size; | |
13474 | + /* check the block size for this node */ | |
13475 | + if (node->block_size > parent->block_size) | |
13476 | + parent->block_size = node->block_size; | |
13477 | + /* set the child node */ | |
13478 | + child_entry->child_node = node; | |
13479 | + /* set the metadata for this node */ | |
13480 | + child_entry->child_metadata = md; | |
13481 | + } | |
13482 | + | |
13483 | + /* on error, clean up accordingly */ | |
13484 | + if (rc) { | |
13485 | + if (md) | |
13486 | + kfree(md); | |
13487 | + LOG_SERIOUS("%s: rc(%d) from '%s'\n", | |
13488 | + __FUNCTION__, rc, node->name); | |
13489 | + LOG_SERIOUS("deleting child node '%s'.\n", node->name); | |
13490 | + rc = DELETE(node); | |
13491 | + if (rc) { | |
13492 | + LOG_SERIOUS | |
13493 | + ("error(%d) attempting to delete '%s'.\n", | |
13494 | + rc, node->name); | |
13495 | + } | |
13496 | + } | |
13497 | + } | |
13498 | + | |
13499 | + /* errors are handled internal to this function */ | |
13500 | + /* by deleting the failed node. This will get */ | |
13501 | + /* picked up by finalize_parent_nodes as a */ | |
13502 | + /* missing child node */ | |
13503 | + return (0); | |
13504 | +} | |
13505 | + | |
13506 | +#define TEST_CHILD_PRESENCE 0 | |
13507 | +#define TEST_CHILD_COUNT 1 | |
13508 | +#define TEST_CHILD_PARENTS_SERIAL_NUM 2 | |
13509 | +#define TEST_CHILD_POSITION 3 | |
13510 | +#define TEST_CHILD_METADATA 4 | |
13511 | + | |
13512 | +/** | |
13513 | + * test_parent_node: verify that a parent is complete | |
13514 | + * @node: specified parent node | |
13515 | + * | |
13516 | + * verify that the parent node has all of its child nodes accounted for. | |
13517 | + * | |
13518 | + * Return value: 0 on success | |
13519 | + * otherwise error code | |
13520 | +**/ | |
13521 | +static int | |
13522 | +test_parent_node(struct evms_logical_node *node) | |
13523 | +{ | |
13524 | + int i, rc = 0; | |
13525 | + struct runtime_data *rd; | |
13526 | + struct runtime_entry *child_entry; | |
13527 | + | |
13528 | + rd = (struct runtime_data *) node->private; | |
13529 | + for (i = 0; i < rd->child_count; i++) { | |
13530 | + child_entry = (struct runtime_entry *) &rd->child_table[i]; | |
13531 | + | |
13532 | + /* insure each child entry is filled */ | |
13533 | + if (!child_entry->child_node) { | |
13534 | + node->flags |= | |
13535 | + EVMS_VOLUME_SET_READ_ONLY | EVMS_VOLUME_PARTIAL; | |
13536 | + LOG_ERROR("%s: missing child(%d).\n", __FUNCTION__, i); | |
13537 | + } else | |
13538 | + /* insure child count is the same */ | |
13539 | + /* in each child's metadata */ | |
13540 | + if (child_entry->child_metadata->child_count != rd->child_count) { | |
13541 | + rc = -EVMS_FEATURE_FATAL_ERROR; | |
13542 | + LOG_ERROR("%s: child count wrong for node '%s'\n", | |
13543 | + __FUNCTION__, node->name); | |
13544 | + } else | |
13545 | + /* insure parent serial number is */ | |
13546 | + /* the same in each child's metadata */ | |
13547 | + if (child_entry->child_metadata->parent_serial_number != | |
13548 | + rd->parent_sn) { | |
13549 | + rc = -EVMS_FEATURE_FATAL_ERROR; | |
13550 | + LOG_ERROR | |
13551 | + ("%s: incorrect [is("PFU64"), should be("PFU64")] child serial number for node '%s'\n", | |
13552 | + __FUNCTION__, | |
13553 | + child_entry->child_metadata->parent_serial_number, | |
13554 | + rd->parent_sn, node->name); | |
13555 | + } else | |
13556 | + /* insure each is in the correct entry */ | |
13557 | + if (child_entry->child_metadata->ordering_table[i]. | |
13558 | + child_serial_number != | |
13559 | + child_entry->child_metadata->child_serial_number) { | |
13560 | + rc = -EVMS_FEATURE_FATAL_ERROR; | |
13561 | + LOG_ERROR | |
13562 | + ("%s: child reports different index for node '%s'\n", | |
13563 | + __FUNCTION__, node->name); | |
13564 | + } else { | |
13565 | + struct runtime_entry *other_child_entry; | |
13566 | + int j, rc2; | |
13567 | + /* compare the children's metadata */ | |
13568 | + | |
13569 | + /* look for another present child to | |
13570 | + * compare against. | |
13571 | + */ | |
13572 | + other_child_entry = NULL; | |
13573 | + for (j = 0; j < rd->child_count; j++) { | |
13574 | + /* skip comparing to ourselves */ | |
13575 | + if (j == i) { | |
13576 | + continue; | |
13577 | + } | |
13578 | + /* is this child is present? */ | |
13579 | + if (rd->child_table[j].child_node) { | |
13580 | + /* yes, use it */ | |
13581 | + other_child_entry = &rd->child_table[j]; | |
13582 | + break; | |
13583 | + } | |
13584 | + } | |
13585 | + /* if we can't find another valid | |
13586 | + * child node's metadata to compare | |
13587 | + * against, just skip this test. | |
13588 | + */ | |
13589 | + if (!other_child_entry) { | |
13590 | + continue; | |
13591 | + } | |
13592 | + rc2 = | |
13593 | + memcmp(other_child_entry->child_metadata-> | |
13594 | + ordering_table, | |
13595 | + child_entry->child_metadata->ordering_table, | |
13596 | + sizeof (child_entry->child_metadata-> | |
13597 | + ordering_table)); | |
13598 | + if (rc2) { | |
13599 | + rc = -EVMS_FEATURE_FATAL_ERROR; | |
13600 | + LOG_ERROR | |
13601 | + ("%s: mismatching child metadata for nodes '%s' and '%s'\n", | |
13602 | + __FUNCTION__, | |
13603 | + rd->child_table[i - 1].child_node->name, | |
13604 | + child_entry->child_node->name); | |
13605 | + } | |
13606 | + } | |
13607 | + /* stop if fatal error encountered */ | |
13608 | + if (rc == -EVMS_FEATURE_FATAL_ERROR) { | |
13609 | + break; | |
13610 | + } | |
13611 | + } | |
13612 | + return (rc); | |
13613 | +} | |
13614 | + | |
13615 | +/** | |
13616 | + * perform_final_adjustments: do final tweaks to parent node | |
13617 | + * @node: parent node | |
13618 | + * | |
13619 | + * This function does the following: | |
13620 | + * sets the vsize (in vsectors) field in each child node | |
13621 | + * sets the voffset (in vsectors) field in each child node | |
13622 | + * frees each child node's metadata | |
13623 | + * sets the parent's total size field | |
13624 | +**/ | |
13625 | +static void | |
13626 | +perform_final_adjustments(struct evms_logical_node *node) | |
13627 | +{ | |
13628 | + int i; | |
13629 | + struct runtime_data *rd; | |
13630 | + struct runtime_entry *child_entry = NULL; | |
13631 | + struct evms_drivelink_metadata *ref_data = NULL; | |
13632 | + | |
13633 | + rd = (struct runtime_data *) node->private; | |
13634 | + /* find a valid copy of the ordering table. | |
13635 | + * since all the ordering tables are the same | |
13636 | + * we can just pick one to use for all the | |
13637 | + * child computations. | |
13638 | + */ | |
13639 | + for (i = 0; i < rd->child_count; i++) { | |
13640 | + child_entry = (struct runtime_entry *) &rd->child_table[i]; | |
13641 | + if (child_entry->child_node) { | |
13642 | + ref_data = child_entry->child_metadata; | |
13643 | + break; | |
13644 | + } | |
13645 | + } | |
13646 | + /* if we got this far, there should | |
13647 | + * always be at least one valid child. | |
13648 | + */ | |
13649 | + if (!ref_data) | |
13650 | + BUG(); | |
13651 | + /* compute the parent's usable size, | |
13652 | + * and construct the table used to | |
13653 | + * remap parent I/Os to child I/Os */ | |
13654 | + for (i = 0; i < rd->child_count; i++) { | |
13655 | + child_entry = (struct runtime_entry *) &rd->child_table[i]; | |
13656 | + /* set the LBA count for this child node */ | |
13657 | + child_entry->vsize = ref_data->ordering_table[i].child_vsize; | |
13658 | + /* set the start LBA value for this child node */ | |
13659 | + child_entry->voffset = node->total_vsectors; | |
13660 | + /* keep a running total of size in sectors */ | |
13661 | + node->total_vsectors += child_entry->vsize; | |
13662 | + /* free the metadata for this child node */ | |
13663 | + if (ref_data != child_entry->child_metadata) { | |
13664 | + kfree(child_entry->child_metadata); | |
13665 | + } | |
13666 | + child_entry->child_metadata = NULL; | |
13667 | + /* free the feature header for this child node */ | |
13668 | + if (child_entry->child_node) { | |
13669 | + kfree(child_entry->child_node->feature_header); | |
13670 | + child_entry->child_node->feature_header = NULL; | |
13671 | + } | |
13672 | + } | |
13673 | + /* free the reference data */ | |
13674 | + kfree(ref_data); | |
13675 | +} | |
13676 | + | |
13677 | +/** | |
13678 | + * finalize_parent_nodes: verify and prepare parent nodes | |
13679 | + * @discover_list: list of potential drivelink parent objects | |
13680 | + * | |
13681 | + * verify the completeness of each parent node. if not complete, purge the in-memory | |
13682 | + * structs for this object and all its children. If complete, perform final tweaks | |
13683 | + * to allow this node to useable. | |
13684 | + * | |
13685 | + * Return value: 0 on success | |
13686 | + * otherwise error code | |
13687 | +**/ | |
13688 | +static int | |
13689 | +finalize_parent_nodes(struct evms_logical_node **discover_list) | |
13690 | +{ | |
13691 | + int rc = 0, rc2; | |
13692 | + struct evms_logical_node *node, *next_node; | |
13693 | + | |
13694 | + for (node = *discover_list; node; node = next_node) { | |
13695 | + next_node = node->next; | |
13696 | + /* only check parent nodes */ | |
13697 | + if (!node->feature_header) { | |
13698 | + /* valid the children of this parent */ | |
13699 | + rc = test_parent_node(node); | |
13700 | + if (!rc) { | |
13701 | + /* compute parent size and | |
13702 | + * child remap table. | |
13703 | + */ | |
13704 | + perform_final_adjustments(node); | |
13705 | + } else { | |
13706 | + /* fatal error encountered. | |
13707 | + * cleanup from this node and | |
13708 | + * delete it from memory. | |
13709 | + */ | |
13710 | + evms_cs_remove_logical_node_from_list | |
13711 | + (discover_list, node); | |
13712 | + rc2 = DELETE(node); | |
13713 | + if (rc2) { | |
13714 | + LOG_SERIOUS | |
13715 | + ("error(%d) attempting to delete '%s'.\n", | |
13716 | + rc2, node->name); | |
13717 | + } | |
13718 | + } | |
13719 | + } | |
13720 | + } | |
13721 | + return (rc); | |
13722 | +} | |
13723 | + | |
13724 | +/** | |
13725 | + * drivelink_discover: discover drivelinked storage objects | |
13726 | + * @discover_list: the list of objects to inspect | |
13727 | + * | |
13728 | + * perform the drivelink discover process on the objects in the discovery list | |
13729 | + * | |
13730 | + * Return value: 0 on success | |
13731 | + * otherwise error code | |
13732 | +**/ | |
13733 | +static int | |
13734 | +drivelink_discover(struct evms_logical_node **discover_list) | |
13735 | +{ | |
13736 | + int rc = 0; | |
13737 | + | |
13738 | + MOD_INC_USE_COUNT; | |
13739 | + rc = process_child_nodes(discover_list); | |
13740 | + if (!rc) | |
13741 | + rc = finalize_parent_nodes(discover_list); | |
13742 | + | |
13743 | + MOD_DEC_USE_COUNT; | |
13744 | + return (rc); | |
13745 | +} | |
13746 | + | |
13747 | +/********************************************************/ | |
13748 | +/* Required Plugin Function Table Entry Point: */ | |
13749 | +/* Delete function */ | |
13750 | +/********************************************************/ | |
13751 | + | |
13752 | +/** | |
13753 | + * drivelink_delete: purges a drivelink object and its children from memory | |
13754 | + * @node: the drivelink object to delete | |
13755 | + * | |
13756 | + * purge the drivelink object, its private data, and all its children from memory. | |
13757 | + * | |
13758 | + * Return value: 0 on success | |
13759 | + * otherwise error code | |
13760 | +**/ | |
13761 | +static int | |
13762 | +drivelink_delete(struct evms_logical_node *node) | |
13763 | +{ | |
13764 | + int i, rc = 0; | |
13765 | + struct runtime_data *rd; | |
13766 | + struct runtime_entry *child_entry; | |
13767 | + | |
13768 | + LOG_DETAILS("deleting '%s'.\n", node->name); | |
13769 | + | |
13770 | + rd = (struct runtime_data *) node->private; | |
13771 | + if (rd) { | |
13772 | + for (i = 0; i < rd->child_count; i++) { | |
13773 | + child_entry = &rd->child_table[i]; | |
13774 | + /* delete the child node */ | |
13775 | + if (child_entry->child_node) { | |
13776 | + rc = DELETE(child_entry->child_node); | |
13777 | + if (rc) | |
13778 | + break; | |
13779 | + child_entry->child_node = NULL; | |
13780 | + } | |
13781 | + /* delete the child's metadata */ | |
13782 | + if (child_entry->child_metadata) { | |
13783 | + kfree(child_entry->child_metadata); | |
13784 | + child_entry->child_metadata = NULL; | |
13785 | + } | |
13786 | + } | |
13787 | + if (!rc) { | |
13788 | + /* delete the child table */ | |
13789 | + if (rd->child_table) { | |
13790 | + kfree(rd->child_table); | |
13791 | + rd->child_table = NULL; | |
13792 | + } | |
13793 | + /* delete the instance data */ | |
13794 | + kfree(rd); | |
13795 | + node->private = NULL; | |
13796 | + } | |
13797 | + } | |
13798 | + if (!rc) { | |
13799 | + evms_cs_deallocate_logical_node(node); | |
13800 | + MOD_DEC_USE_COUNT; | |
13801 | + } | |
13802 | + | |
13803 | + return (rc); | |
13804 | +} | |
13805 | + | |
13806 | +/** | |
13807 | + * which_child: find the child node targetted by a IO to this drivelink object | |
13808 | + * @parent: parent drivelink object | |
13809 | + * @rsector: relative sector on the parent object | |
13810 | + * @max_io_sects: largest IO size on the child, starting from rsector position | |
13811 | + * | |
13812 | + * This function find the child node a parent rsector maps to. | |
13813 | + * It then adjusts the rsector value to be child relative and | |
13814 | + * optionally computes the max # of sectors that can be access | |
13815 | + * from this starting point on the child. | |
13816 | + * | |
13817 | + * Return value: | |
13818 | + * The child node, the child relative rsector and max io size are | |
13819 | + * returned to the caller. On error, the returned child node will | |
13820 | + * be NULL. | |
13821 | +**/ | |
13822 | +static struct evms_logical_node * | |
13823 | +which_child(struct evms_logical_node *parent, | |
13824 | + u64 * rsector, u64 * max_io_sects) | |
13825 | +{ | |
13826 | + int i; | |
13827 | + struct evms_logical_node *child = NULL; | |
13828 | + struct runtime_data *rd; | |
13829 | + struct runtime_entry *child_entry = NULL; | |
13830 | + | |
13831 | + rd = (struct runtime_data *) parent->private; | |
13832 | + for (i = 0; i < rd->child_count; i++) { | |
13833 | + child_entry = (struct runtime_entry *) &rd->child_table[i]; | |
13834 | + | |
13835 | + if (*rsector >= child_entry->vsize) { | |
13836 | + *rsector -= child_entry->vsize; | |
13837 | + } else { | |
13838 | + /* get the child node */ | |
13839 | + child = child_entry->child_node; | |
13840 | + /* compute the sector count if requested */ | |
13841 | + if (max_io_sects) | |
13842 | + /* this is only used for INIT I/O | |
13843 | + * to return the largest sector | |
13844 | + * count size for this child based | |
13845 | + * on first sector in the I/O. | |
13846 | + */ | |
13847 | + *max_io_sects = child_entry->vsize - *rsector; | |
13848 | + break; | |
13849 | + } | |
13850 | + } | |
13851 | + return (child); | |
13852 | +} | |
13853 | + | |
13854 | +/** | |
13855 | + * drivelink_io_error: log an IO error for drivelink | |
13856 | + * @node: drivelink object | |
13857 | + * @bh: buffer head targetting this object | |
13858 | + * | |
13859 | + * this function was primarily created because the function | |
13860 | + * buffer_IO_error is inline and kgdb doesn't allow breakpoints | |
13861 | + * to be set on inline functions. Since this was an error path | |
13862 | + * and not mainline, I decided to add a trace statement to help | |
13863 | + * report on the failing condition. | |
13864 | +**/ | |
13865 | +static void | |
13866 | +drivelink_io_error(struct evms_logical_node *node, int io_flag, struct buffer_head *bh) | |
13867 | +{ | |
13868 | + LOG_SERIOUS("%s error on '%s' remapping rsector("PFU64").\n", | |
13869 | + (io_flag) ? "WRITE" : "READ", | |
13870 | + node->name, (u64) bh->b_rsector); | |
13871 | + | |
13872 | + bh->b_end_io(bh, 0); | |
13873 | +} | |
13874 | + | |
13875 | +/********************************************************/ | |
13876 | +/* Required Plugin Function Table Entry Point: */ | |
13877 | +/* Read function & Support routines */ | |
13878 | +/********************************************************/ | |
13879 | + | |
13880 | +/** | |
13881 | + * drivelink_read: handles IO read operations to drivelink objects | |
13882 | + * @node: drivelink object | |
13883 | + * @bh: buffer head targetting this object | |
13884 | + * | |
13885 | + * handles IO read operations to the drivelink objects. internally remaps the | |
13886 | + * drivelink relative requests to the child relative requests and then routes | |
13887 | + * it to the child for further processing. | |
13888 | +**/ | |
13889 | +static void | |
13890 | +drivelink_read(struct evms_logical_node *node, struct buffer_head *bh) | |
13891 | +{ | |
13892 | + struct evms_logical_node *child; | |
13893 | + u64 io_size, rsector; | |
13894 | + | |
13895 | + rsector = bh->b_rsector; | |
13896 | + child = which_child(node, &rsector, &io_size); | |
13897 | + if (child && ((bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <= io_size)) { | |
13898 | + bh->b_rsector = rsector; | |
13899 | + R_IO(child, bh); | |
13900 | + } else { | |
13901 | + drivelink_io_error(node, READ, bh); | |
13902 | + } | |
13903 | +} | |
13904 | + | |
13905 | +/********************************************************/ | |
13906 | +/* Required Plugin Function Table Entry Point: */ | |
13907 | +/* Write function & Support routines */ | |
13908 | +/********************************************************/ | |
13909 | + | |
13910 | +/** | |
13911 | + * drivelink_read_write: handles IO write operations to drivelink objects | |
13912 | + * @node: drivelink object | |
13913 | + * @bh: buffer head targetting this object | |
13914 | + * | |
13915 | + * handles IO write operations to the drivelink objects. internally remaps the | |
13916 | + * drivelink relative requests to the child relative requests and then routes | |
13917 | + * it to the child for further processing. | |
13918 | +**/ | |
13919 | +static void | |
13920 | +drivelink_write(struct evms_logical_node *node, struct buffer_head *bh) | |
13921 | +{ | |
13922 | + struct evms_logical_node *child; | |
13923 | + u64 io_size, rsector; | |
13924 | + | |
13925 | + rsector = bh->b_rsector; | |
13926 | + child = which_child(node, &rsector, &io_size); | |
13927 | + if (child && ((bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <= io_size)) { | |
13928 | + bh->b_rsector = rsector; | |
13929 | + W_IO(child, bh); | |
13930 | + } else { | |
13931 | + drivelink_io_error(node, WRITE, bh); | |
13932 | + } | |
13933 | +} | |
13934 | + | |
13935 | +/********************************************************/ | |
13936 | +/* Required Plugin Function Table Entry Point: */ | |
13937 | +/* Init I/O function */ | |
13938 | +/********************************************************/ | |
13939 | + | |
13940 | +/** | |
13941 | + * drivelink_init_io: performs synchronous IO to drivelink objects | |
13942 | + * @node: drivelink object | |
13943 | + * @io_flag: read/write flag | |
13944 | + * @sect_nr: starting sector, object relative (512 byte units) | |
13945 | + * @num_sects: count of sectors | |
13946 | + * @buf_addr: buffer address to read from/write to | |
13947 | + * | |
13948 | + * This function must determine which child or children a | |
13949 | + * specified I/O request must be passed to. Also if, when, | |
13950 | + * and how a request must be broken up. | |
13951 | + * | |
13952 | + * Return value: 0 on success | |
13953 | + * otherwise error code | |
13954 | +**/ | |
13955 | +static int | |
13956 | +drivelink_init_io(struct evms_logical_node *node, int io_flag, | |
13957 | + u64 sect_nr, | |
13958 | + u64 num_sects, | |
13959 | + void *buf_addr) | |
13960 | +{ | |
13961 | + int rc = 0; | |
13962 | + | |
13963 | + if (!node) | |
13964 | + rc = -EINVAL; | |
13965 | + else { | |
13966 | + u64 starting_sector, remaining_sectors; | |
13967 | + void *io_buf; | |
13968 | + struct runtime_data *rd; | |
13969 | + | |
13970 | + if ((sect_nr + num_sects) > node->total_vsectors) { | |
13971 | + LOG_SERIOUS | |
13972 | + ("attempted out of bound("PFU64") %s on '%s' at sector("PFU64"), count("PFU64").\n", | |
13973 | + node->total_vsectors, (io_flag) ? "WRITE" : "READ", | |
13974 | + node->name, sect_nr, num_sects); | |
13975 | + rc = -EINVAL; | |
13976 | + } else { | |
13977 | + rd = (struct runtime_data *) node->private; | |
13978 | + /* make working copies of input parameters */ | |
13979 | + starting_sector = sect_nr; | |
13980 | + remaining_sectors = num_sects; | |
13981 | + io_buf = buf_addr; | |
13982 | + /* loop until all I/O is performed */ | |
13983 | + while (remaining_sectors) { | |
13984 | + u64 io_start, io_size; | |
13985 | + struct evms_logical_node *child; | |
13986 | + | |
13987 | + /* compute the child relative io_start | |
13988 | + * and max io_size. | |
13989 | + */ | |
13990 | + io_start = starting_sector; | |
13991 | + child = which_child(node, &io_start, &io_size); | |
13992 | + /* adjust io_size based on | |
13993 | + * original remaining sectors | |
13994 | + * in this io. | |
13995 | + */ | |
13996 | + if (io_size > remaining_sectors) | |
13997 | + io_size = remaining_sectors; | |
13998 | + if (child) { | |
13999 | + rc = INIT_IO(child, | |
14000 | + io_flag, | |
14001 | + io_start, io_size, io_buf); | |
14002 | + } else { | |
14003 | + /* if partial volume, return 0's | |
14004 | + * for missing children. | |
14005 | + */ | |
14006 | + if (io_flag == READ) { | |
14007 | + memset(io_buf, 0, | |
14008 | + io_size << | |
14009 | + EVMS_VSECTOR_SIZE_SHIFT); | |
14010 | + } | |
14011 | + } | |
14012 | + if (!rc) { | |
14013 | + /* adjust working copies */ | |
14014 | + starting_sector += io_size; | |
14015 | + remaining_sectors -= io_size; | |
14016 | + io_buf += io_size << | |
14017 | + EVMS_VSECTOR_SIZE_SHIFT; | |
14018 | + } else | |
14019 | + break; | |
14020 | + } | |
14021 | + } | |
14022 | + } | |
14023 | + | |
14024 | + return (rc); | |
14025 | +} | |
14026 | + | |
14027 | +/********************************************************/ | |
14028 | +/* Required Plugin Function Table Entry Point: */ | |
14029 | +/* IOCTL function & Support routines */ | |
14030 | +/********************************************************/ | |
14031 | + | |
14032 | +/** | |
14033 | + * drivelink_ioctl_cmd_plugin_ioctl: drivelink support for the 'plugin ioctl' command | |
14034 | + * @node: drivelink object | |
14035 | + * @inode: VFS supplied parameter | |
14036 | + * @file: VFS supplied parameter | |
14037 | + * @cmd: the specific ioctl command | |
14038 | + * @arg: the specific ioctl arguments | |
14039 | + * | |
14040 | + * this function handles 'plugin ioctl' commands. currently there is no specific | |
14041 | + * commands for this plugin. however, this plugin must broadcast some commands so | |
14042 | + * lower layers can receive them. | |
14043 | + * | |
14044 | + * Return value: 0 on success | |
14045 | + * otherwise error code | |
14046 | +**/ | |
14047 | +static int | |
14048 | +drivelink_ioctl_cmd_plugin_ioctl(struct evms_logical_node *node, | |
14049 | + struct inode *inode, struct file *file, | |
14050 | + unsigned long cmd, unsigned long arg) | |
14051 | +{ | |
14052 | + int i, rc = 0; | |
14053 | + struct runtime_data *rd; | |
14054 | + struct evms_plugin_ioctl_pkt tmp, *user_parms; | |
14055 | + | |
14056 | + user_parms = (struct evms_plugin_ioctl_pkt *) arg; | |
14057 | + /* copy user's parameters to kernel space */ | |
14058 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
14059 | + rc = -EFAULT; | |
14060 | + | |
14061 | + if (!rc) { | |
14062 | + rd = (struct runtime_data *) node->private; | |
14063 | + /* is this cmd targetted at this feature ? */ | |
14064 | + if (tmp.feature_id == node->plugin->id) { | |
14065 | + switch (tmp.feature_command) { | |
14066 | + default: | |
14067 | + break; | |
14068 | + } | |
14069 | + } else { /* broadcast this cmd to all children */ | |
14070 | + for (i = 0; i < rd->child_count; i++) { | |
14071 | + struct evms_logical_node *child_node; | |
14072 | + | |
14073 | + child_node = rd->child_table[i].child_node; | |
14074 | + if (child_node) { | |
14075 | + rc = IOCTL(child_node, inode, file, | |
14076 | + cmd, arg); | |
14077 | + if (rc) | |
14078 | + break; | |
14079 | + } | |
14080 | + } | |
14081 | + } | |
14082 | + /* copy info to userspace */ | |
14083 | + if (copy_to_user(user_parms, &tmp, sizeof (tmp))) | |
14084 | + rc = -EFAULT; | |
14085 | + } | |
14086 | + return (rc); | |
14087 | +} | |
14088 | + | |
14089 | +/** | |
14090 | + * drivelink_ioctl_cmd_broadcast: broadcast ioctls to your children | |
14091 | + * @node: drivelink object | |
14092 | + * @inode: VFS supplied parameter | |
14093 | + * @file: VFS supplied parameter | |
14094 | + * @cmd: the specific ioctl command | |
14095 | + * @arg: the specific ioctl arguments | |
14096 | + * | |
14097 | + * broadcast the specified ioctl command and arguments to all this objects | |
14098 | + * children. OR (logical opeation) the return values from all the children | |
14099 | + * and return the OR'd value to the caller. | |
14100 | + * | |
14101 | + * Return value: 0 on success | |
14102 | + * otherwise error code | |
14103 | +**/ | |
14104 | +static int | |
14105 | +drivelink_ioctl_cmd_broadcast(struct evms_logical_node *node, | |
14106 | + struct inode *inode, struct file *file, | |
14107 | + unsigned long cmd, unsigned long arg) | |
14108 | +{ | |
14109 | + int i, rc = 0; | |
14110 | + struct runtime_data *rd; | |
14111 | + | |
14112 | + rd = (struct runtime_data *) node->private; | |
14113 | + /* broadcast this cmd to all children */ | |
14114 | + for (i = 0; i < rd->child_count; i++) { | |
14115 | + struct evms_logical_node *child_node; | |
14116 | + | |
14117 | + child_node = rd->child_table[i].child_node; | |
14118 | + if (child_node) { | |
14119 | + rc |= IOCTL(child_node, inode, file, cmd, arg); | |
14120 | + } | |
14121 | + } | |
14122 | + return (rc); | |
14123 | +} | |
14124 | + | |
14125 | +/** | |
14126 | + * drivelink_ioctl: main ioctl entry point and handler | |
14127 | + * @node: drivelink object | |
14128 | + * @inode: VFS supplied parameter | |
14129 | + * @file: VFS supplied parameter | |
14130 | + * @cmd: a specific ioctl command | |
14131 | + * @arg: a specific ioctl argument | |
14132 | + * | |
14133 | + * handles specific ioctl command internally and routes other ioctls commands to | |
14134 | + * the appropriate entry points. | |
14135 | + * | |
14136 | + * Returns: 0 on success | |
14137 | + * otherwise error code | |
14138 | + **/ | |
14139 | +static int | |
14140 | +drivelink_ioctl(struct evms_logical_node *node, | |
14141 | + struct inode *inode, | |
14142 | + struct file *file, unsigned int cmd, unsigned long arg) | |
14143 | +{ | |
14144 | + int rc = 0; | |
14145 | + struct runtime_data *rd = NULL; | |
14146 | + struct hd_geometry hdgeo; | |
14147 | + | |
14148 | + if ((!node) || (!inode)) | |
14149 | + rc = -EINVAL; | |
14150 | + | |
14151 | + if (!rc) { | |
14152 | + rd = (struct runtime_data *) node->private; | |
14153 | + switch (cmd) { | |
14154 | + case HDIO_GETGEO: | |
14155 | + hdgeo.heads = 255; | |
14156 | + hdgeo.sectors = 63; | |
14157 | + hdgeo.cylinders = | |
14158 | + ((unsigned int) node->total_vsectors) / | |
14159 | + hdgeo.heads / hdgeo.sectors; | |
14160 | + hdgeo.start = 0; | |
14161 | + if (copy_to_user((int *) arg, &hdgeo, sizeof (hdgeo))) | |
14162 | + rc = -EFAULT; | |
14163 | + break; | |
14164 | + case EVMS_QUIESCE_VOLUME: | |
14165 | + case EVMS_GET_DISK_LIST: | |
14166 | + case EVMS_CHECK_MEDIA_CHANGE: | |
14167 | + case EVMS_REVALIDATE_DISK: | |
14168 | + case EVMS_OPEN_VOLUME: | |
14169 | + case EVMS_CLOSE_VOLUME: | |
14170 | + case EVMS_CHECK_DEVICE_STATUS: | |
14171 | + rc = drivelink_ioctl_cmd_broadcast(node, inode, file, | |
14172 | + cmd, arg); | |
14173 | + break; | |
14174 | + case EVMS_PLUGIN_IOCTL: | |
14175 | + rc = drivelink_ioctl_cmd_plugin_ioctl(node, inode, file, | |
14176 | + cmd, arg); | |
14177 | + break; | |
14178 | + case EVMS_GET_BMAP: | |
14179 | + { | |
14180 | + struct evms_get_bmap_pkt *bmap; | |
14181 | + u64 io_start, io_size; | |
14182 | + struct evms_logical_node *child; | |
14183 | + | |
14184 | + bmap = (struct evms_get_bmap_pkt *) arg; | |
14185 | + io_start = bmap->rsector; | |
14186 | + child = which_child(node, &io_start, &io_size); | |
14187 | + if (child) { | |
14188 | + if (node->block_size != | |
14189 | + child->block_size) { | |
14190 | + bmap->status = -EPERM; | |
14191 | + } else { | |
14192 | + bmap->rsector = io_start; | |
14193 | + rc = IOCTL(child, | |
14194 | + inode, | |
14195 | + file, cmd, arg); | |
14196 | + } | |
14197 | + } | |
14198 | + } | |
14199 | + break; | |
14200 | + default: | |
14201 | + rc = -EINVAL; | |
14202 | + break; | |
14203 | + } | |
14204 | + } | |
14205 | + return (rc); | |
14206 | +} | |
14207 | + | |
14208 | +/********************************************************/ | |
14209 | +/* Required Module Entry Point: */ | |
14210 | +/* drivelink_init */ | |
14211 | +/********************************************************/ | |
14212 | + | |
14213 | +/** | |
14214 | + * drivelink_init: register this module for use within the EVMS framework | |
14215 | + * | |
14216 | + * Return value: 0 on success | |
14217 | + * otherwise error code. | |
14218 | +**/ | |
14219 | +int __init | |
14220 | +drivelink_init(void) | |
14221 | +{ | |
14222 | + return evms_cs_register_plugin(&plugin_header); | |
14223 | +} | |
14224 | + | |
14225 | +/** | |
14226 | + * drivelink_exit: unregister this module from use within the EVMS framework | |
14227 | + * | |
14228 | + * Return value: 0 on success | |
14229 | + * otherwise error code. | |
14230 | +**/ | |
14231 | +void __exit | |
14232 | +drivelink_exit(void) | |
14233 | +{ | |
14234 | + evms_cs_unregister_plugin(&plugin_header); | |
14235 | +} | |
14236 | + | |
14237 | +module_init(drivelink_init); | |
14238 | +module_exit(drivelink_exit); | |
14239 | +#ifdef MODULE_LICENSE | |
14240 | +MODULE_LICENSE("GPL"); | |
14241 | +#endif | |
14242 | diff -Naur linux-2002-09-30/drivers/evms/evms_ecr.c evms-2002-09-30/drivers/evms/evms_ecr.c | |
14243 | --- linux-2002-09-30/drivers/evms/evms_ecr.c Wed Dec 31 18:00:00 1969 | |
14244 | +++ evms-2002-09-30/drivers/evms/evms_ecr.c Fri Aug 16 16:19:56 2002 | |
14245 | @@ -0,0 +1,213 @@ | |
14246 | +/* -*- linux-c -*- */ | |
14247 | +/* | |
14248 | + * | |
14249 | + * Copyright (c) International Business Machines Corp., 2000 | |
14250 | + * | |
14251 | + * This program is free software; you can redistribute it and/or modify | |
14252 | + * it under the terms of the GNU General Public License as published by | |
14253 | + * the Free Software Foundation; either version 2 of the License, or | |
14254 | + * (at your option) any later version. | |
14255 | + * | |
14256 | + * This program is distributed in the hope that it will be useful, | |
14257 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14258 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
14259 | + * the GNU General Public License for more details. | |
14260 | + * | |
14261 | + * You should have received a copy of the GNU General Public License | |
14262 | + * along with this program; if not, write to the Free Software | |
14263 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
14264 | + */ | |
14265 | + | |
14266 | +/* linux/driver/evms/evms_ecr.c | |
14267 | + * | |
14268 | + * EVMS - Cluster enablement (ECR) module | |
14269 | + * | |
14270 | + */ | |
14271 | + | |
14272 | + | |
14273 | +#include <linux/kernel.h> | |
14274 | +#include <linux/module.h> | |
14275 | +#include <linux/init.h> | |
14276 | +#include <linux/types.h> | |
14277 | +#include <linux/evms/evms.h> | |
14278 | +#include <linux/evms/evms_ecr.h> | |
14279 | + | |
14280 | +#define LOG_PREFIX "ecr: " | |
14281 | + | |
14282 | + | |
14283 | +/* | |
14284 | + * ecr_group_join | |
14285 | + */ | |
14286 | +ecr_group_t ecr_group_join(char *group_name, ecr_table_t *f_table, | |
14287 | + ecr_cred_t * cred, size_t size, ecr_instance_t *instance) | |
14288 | +{ | |
14289 | + /* dummy */ | |
14290 | + return ECR_FAIL; | |
14291 | +} | |
14292 | + | |
14293 | + | |
14294 | + | |
14295 | + | |
14296 | +/* | |
14297 | + * ecr_group_leave | |
14298 | + */ | |
14299 | +void ecr_group_leave(ecr_group_t group) | |
14300 | +{ | |
14301 | + /* dummy */ | |
14302 | + return; | |
14303 | +} | |
14304 | + | |
14305 | + | |
14306 | + | |
14307 | +/* | |
14308 | + * ecr_group_send | |
14309 | + */ | |
14310 | +int ecr_group_send(ecr_group_t group, ecr_nodeid_t node, void *message, | |
14311 | + size_t size, ecr_instance_t *instance, | |
14312 | + void callback(int ret, ecr_instance_t *instance)) | |
14313 | +{ | |
14314 | + /* dummy */ | |
14315 | + return ECR_FAIL; | |
14316 | +} | |
14317 | + | |
14318 | + | |
14319 | + | |
14320 | +/* | |
14321 | + * ecr_group_send_wait | |
14322 | + */ | |
14323 | +int ecr_group_send_wait(ecr_group_t group, ecr_nodeid_t node, void *message, | |
14324 | + size_t size, int *ret) | |
14325 | +{ | |
14326 | + /* dummy */ | |
14327 | + *ret = ECR_FAIL; | |
14328 | + return ECR_FAIL; | |
14329 | +} | |
14330 | + | |
14331 | + | |
14332 | + | |
14333 | +/* | |
14334 | + * ecr_group_broadcast | |
14335 | + */ | |
14336 | +int ecr_group_broadcast(ecr_group_t group, void *message, size_t size, | |
14337 | + ecr_instance_t *instance, | |
14338 | + void callback(u_char ret, ecr_instance_t *instance)) | |
14339 | +{ | |
14340 | + /* dummy */ | |
14341 | + return ECR_FAIL; | |
14342 | +} | |
14343 | + | |
14344 | + | |
14345 | + | |
14346 | +/* | |
14347 | + * ecr_group_broadcast_wait | |
14348 | + */ | |
14349 | +int ecr_group_broadcast_wait(ecr_group_t group, void *message, size_t size, | |
14350 | + u_char *ret) | |
14351 | +{ | |
14352 | + /* dummy */ | |
14353 | + *ret = ECR_FAIL; | |
14354 | + return ECR_FAIL; | |
14355 | +} | |
14356 | + | |
14357 | + | |
14358 | + | |
14359 | +/* | |
14360 | + * ecr_group_atomic_execute | |
14361 | + */ | |
14362 | +int ecr_group_atomic_execute(ecr_group_t group, void *message, size_t size, | |
14363 | + ecr_instance_t *instance, | |
14364 | + void callback(ecr_instance_t *instance)) | |
14365 | +{ | |
14366 | + /* dummy */ | |
14367 | + return ECR_FAIL; | |
14368 | +} | |
14369 | + | |
14370 | + | |
14371 | + | |
14372 | +/* | |
14373 | + * ecr_group_atomic_execute_wait | |
14374 | + */ | |
14375 | +int ecr_group_atomic_execute_wait(ecr_group_t group, void *message, size_t size) | |
14376 | +{ | |
14377 | + /* dummy */ | |
14378 | + return ECR_FAIL; | |
14379 | +} | |
14380 | + | |
14381 | + | |
14382 | + | |
14383 | +/* | |
14384 | + * ecr_group_success_response | |
14385 | + */ | |
14386 | +void ecr_group_success_response(ecr_message_t *handle) | |
14387 | +{ | |
14388 | + /* dummy */ | |
14389 | + return; | |
14390 | +} | |
14391 | + | |
14392 | + | |
14393 | + | |
14394 | + | |
14395 | +/* | |
14396 | + * ecr_group_failure_response | |
14397 | + */ | |
14398 | +void ecr_group_failure_response(ecr_message_t *handle, int ret) | |
14399 | +{ | |
14400 | + /* dummy */ | |
14401 | + return; | |
14402 | +} | |
14403 | + | |
14404 | + | |
14405 | + | |
14406 | +/* | |
14407 | + * ecr_lock_create | |
14408 | + */ | |
14409 | +ecr_lock_t ecr_lock_create(char *lockname) | |
14410 | +{ | |
14411 | + /* dummy */ | |
14412 | + return ECR_FAIL; | |
14413 | +} | |
14414 | + | |
14415 | +/* | |
14416 | + * ecr_lock | |
14417 | + */ | |
14418 | +int ecr_lock(ecr_lock_t lock, u64 start, u64 length, | |
14419 | + ecr_lock_mode_t mode, u_char flag) | |
14420 | +{ | |
14421 | + /* dummy */ | |
14422 | + return ECR_FAIL; | |
14423 | +} | |
14424 | + | |
14425 | + | |
14426 | + | |
14427 | +/* | |
14428 | + * ecr_unlock | |
14429 | + */ | |
14430 | +int ecr_unlock(ecr_lock_t lock, u64 start, u64 length) | |
14431 | +{ | |
14432 | + /* dummy */ | |
14433 | + return ECR_FAIL; | |
14434 | +} | |
14435 | + | |
14436 | + | |
14437 | +/********************************************************/ | |
14438 | +/* Required Module Entry Point: */ | |
14439 | +/* ecr_init() */ | |
14440 | +/********************************************************/ | |
14441 | + | |
14442 | +static int __init ecr_init(void) | |
14443 | +{ | |
14444 | + /* dummy */ | |
14445 | + return 0; | |
14446 | +} | |
14447 | + | |
14448 | +static void __exit ecr_exit(void) | |
14449 | +{ | |
14450 | + return; | |
14451 | +} | |
14452 | + | |
14453 | +module_init(ecr_init); | |
14454 | +module_exit(ecr_exit); | |
14455 | +#ifdef MODULE_LICENSE | |
14456 | +MODULE_LICENSE("GPL"); | |
14457 | +#endif | |
14458 | + | |
14459 | diff -Naur linux-2002-09-30/drivers/evms/evms_passthru.c evms-2002-09-30/drivers/evms/evms_passthru.c | |
14460 | --- linux-2002-09-30/drivers/evms/evms_passthru.c Wed Dec 31 18:00:00 1969 | |
14461 | +++ evms-2002-09-30/drivers/evms/evms_passthru.c Fri Sep 13 16:09:55 2002 | |
14462 | @@ -0,0 +1,298 @@ | |
14463 | +/* -*- linux-c -*- */ | |
14464 | + | |
14465 | +/* | |
14466 | + * | |
14467 | + * | |
14468 | + * Copyright (c) International Business Machines Corp., 2000 | |
14469 | + * | |
14470 | + * This program is free software; you can redistribute it and/or modify | |
14471 | + * it under the terms of the GNU General Public License as published by | |
14472 | + * the Free Software Foundation; either version 2 of the License, or | |
14473 | + * (at your option) any later version. | |
14474 | + * | |
14475 | + * This program is distributed in the hope that it will be useful, | |
14476 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14477 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
14478 | + * the GNU General Public License for more details. | |
14479 | + * | |
14480 | + * You should have received a copy of the GNU General Public License | |
14481 | + * along with this program; if not, write to the Free Software | |
14482 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
14483 | + * | |
14484 | + * | |
14485 | + */ | |
14486 | +/* | |
14487 | + * linux/drivers/evms/evms_passthru.c | |
14488 | + * | |
14489 | + * EVMS System Data Manager | |
14490 | + * | |
14491 | + * | |
14492 | + */ | |
14493 | + | |
14494 | +#include <linux/module.h> | |
14495 | +#include <linux/kernel.h> | |
14496 | +#include <linux/config.h> | |
14497 | +#include <linux/genhd.h> | |
14498 | +#include <linux/string.h> | |
14499 | +#include <linux/blk.h> | |
14500 | +#include <linux/init.h> | |
14501 | +#include <linux/slab.h> | |
14502 | +#include <linux/evms/evms.h> | |
14503 | +#include <asm/system.h> | |
14504 | + | |
14505 | +#define EVMS_PASSTHRU_ID 0 | |
14506 | +#define LOG_PREFIX "passthru: " | |
14507 | + | |
14508 | +static int passthru_mgr_discover(struct evms_logical_node **); | |
14509 | +static int passthru_mgr_delete(struct evms_logical_node *); | |
14510 | +static void passthru_mgr_read(struct evms_logical_node *, struct buffer_head *); | |
14511 | +static void passthru_mgr_write(struct evms_logical_node *, struct buffer_head *); | |
14512 | +static int passthru_mgr_ioctl(struct evms_logical_node *, | |
14513 | + struct inode *, | |
14514 | + struct file *, unsigned int, unsigned long); | |
14515 | +static int passthru_mgr_init_io(struct evms_logical_node *, | |
14516 | + int, u64, u64, void *); | |
14517 | + | |
14518 | +static struct evms_plugin_fops fops = { | |
14519 | + .discover = passthru_mgr_discover, | |
14520 | + .delete = passthru_mgr_delete, | |
14521 | + .read = passthru_mgr_read, | |
14522 | + .write = passthru_mgr_write, | |
14523 | + .init_io = passthru_mgr_init_io, | |
14524 | + .ioctl = passthru_mgr_ioctl | |
14525 | +}; | |
14526 | + | |
14527 | +static struct evms_plugin_header plugin_header = { | |
14528 | + .id = SetPluginID(IBM_OEM_ID, | |
14529 | + EVMS_FEATURE, | |
14530 | + EVMS_PASSTHRU_ID), | |
14531 | + .version = { | |
14532 | + .major = 1, | |
14533 | + .minor = 1, | |
14534 | + .patchlevel = 1 | |
14535 | + }, | |
14536 | + .required_services_version = { | |
14537 | + .major = 0, | |
14538 | + .minor = 5, | |
14539 | + .patchlevel = 0 | |
14540 | + }, | |
14541 | + .fops = &fops | |
14542 | +}; | |
14543 | + | |
14544 | +/*******************************/ | |
14545 | +/* discovery support functions */ | |
14546 | +/*******************************/ | |
14547 | + | |
14548 | +static int | |
14549 | +process_passthru_data(struct evms_logical_node **pp) | |
14550 | +{ | |
14551 | + int rc, size_in_sectors; | |
14552 | + struct evms_logical_node *node, *new_node; | |
14553 | + | |
14554 | + node = *pp; | |
14555 | + | |
14556 | + size_in_sectors = | |
14557 | + evms_cs_size_in_vsectors(sizeof (struct evms_feature_header)); | |
14558 | + | |
14559 | + /* allocate "parent" node */ | |
14560 | + rc = evms_cs_allocate_logical_node(&new_node); | |
14561 | + if (!rc) { | |
14562 | + /* initialize "parent" node */ | |
14563 | + new_node->private = node; | |
14564 | + new_node->flags = node->flags; | |
14565 | + new_node->plugin = &plugin_header; | |
14566 | + new_node->system_id = node->system_id; | |
14567 | + new_node->block_size = node->block_size; | |
14568 | + new_node->hardsector_size = node->hardsector_size; | |
14569 | + new_node->total_vsectors = node->total_vsectors; | |
14570 | + new_node->total_vsectors -= | |
14571 | + (size_in_sectors << 1) + | |
14572 | + node->feature_header->alignment_padding; | |
14573 | + new_node->volume_info = node->volume_info; | |
14574 | + strcpy(new_node->name, node->name); | |
14575 | + if (strlen(node->feature_header->object_name)) | |
14576 | + strcat(new_node->name, | |
14577 | + node->feature_header->object_name); | |
14578 | + else | |
14579 | + strcat(new_node->name, "_Passthru"); | |
14580 | + | |
14581 | + /* return "parent" node to caller */ | |
14582 | + *pp = new_node; | |
14583 | + | |
14584 | + MOD_INC_USE_COUNT; | |
14585 | + | |
14586 | + LOG_DETAILS("feature header found on '%s', created '%s'.\n", | |
14587 | + node->name, new_node->name); | |
14588 | + /* we're done with the passthru feature headers | |
14589 | + * so lets delete them now. | |
14590 | + */ | |
14591 | + kfree(node->feature_header); | |
14592 | + node->feature_header = NULL; | |
14593 | + } else { | |
14594 | + /* on any fatal error, delete the node */ | |
14595 | + int rc2 = DELETE(node); | |
14596 | + if (rc2) { | |
14597 | + LOG_DEFAULT | |
14598 | + ("error(%d) attempting to delete node(%p,%s).\n", | |
14599 | + rc2, node, node->name); | |
14600 | + } | |
14601 | + } | |
14602 | + return (rc); | |
14603 | +} | |
14604 | + | |
14605 | +/********** Required Plugin Functions **********/ | |
14606 | + | |
14607 | +/* | |
14608 | + * Function: passthru_mgr_discover | |
14609 | + * | |
14610 | + */ | |
14611 | +static int | |
14612 | +passthru_mgr_discover(struct evms_logical_node **discover_list) | |
14613 | +{ | |
14614 | + int rc = 0; | |
14615 | + struct evms_logical_node *node, *tmp_list_head; | |
14616 | + | |
14617 | + MOD_INC_USE_COUNT; | |
14618 | + tmp_list_head = *discover_list; | |
14619 | + *discover_list = NULL; | |
14620 | + | |
14621 | + while (tmp_list_head) { | |
14622 | + node = tmp_list_head; | |
14623 | + rc = evms_cs_remove_logical_node_from_list(&tmp_list_head, | |
14624 | + node); | |
14625 | + if (!rc) | |
14626 | + rc = process_passthru_data(&node); | |
14627 | + if (!rc) | |
14628 | + if (node) | |
14629 | + rc = evms_cs_add_logical_node_to_list | |
14630 | + (discover_list, node); | |
14631 | + } | |
14632 | + MOD_DEC_USE_COUNT; | |
14633 | + return (rc); | |
14634 | +} | |
14635 | + | |
14636 | +/* | |
14637 | + * Function: passthru_mgr_delete | |
14638 | + * | |
14639 | + */ | |
14640 | +static int | |
14641 | +passthru_mgr_delete(struct evms_logical_node *node) | |
14642 | +{ | |
14643 | + int rc; | |
14644 | + struct evms_logical_node *p; | |
14645 | + | |
14646 | + LOG_DETAILS("deleting '%s'.\n", node->name); | |
14647 | + | |
14648 | + p = node->private; | |
14649 | + rc = DELETE(p); | |
14650 | + if (!rc) { | |
14651 | + evms_cs_deallocate_logical_node(node); | |
14652 | + MOD_DEC_USE_COUNT; | |
14653 | + } | |
14654 | + return (rc); | |
14655 | +} | |
14656 | + | |
14657 | +/* | |
14658 | + * function: passthru_io_error | |
14659 | + * | |
14660 | + * this function was primarily created because the function | |
14661 | + * buffer_IO_error is inline and kgdb doesn't allow breakpoints | |
14662 | + * to be set on inline functions. Since this was an error path | |
14663 | + * and not mainline, I decided to add a trace statement to help | |
14664 | + * report on the failing condition. | |
14665 | + * | |
14666 | + */ | |
14667 | +static void | |
14668 | +passthru_io_error(struct evms_logical_node *node, int io_flag, struct buffer_head *bh) | |
14669 | +{ | |
14670 | + LOG_SERIOUS | |
14671 | + ("attempt to %s beyond boundary("PFU64") on (%s), rsector("PFU64").\n", | |
14672 | + (io_flag) ? "WRITE" : "READ", node->total_vsectors - 1, | |
14673 | + node->name, (u64) bh->b_rsector); | |
14674 | + | |
14675 | + bh->b_end_io(bh, 0); | |
14676 | +} | |
14677 | + | |
14678 | +/* | |
14679 | + * Function: passthru_mgr_read | |
14680 | + */ | |
14681 | +static void | |
14682 | +passthru_mgr_read(struct evms_logical_node *node, struct buffer_head *bh) | |
14683 | +{ | |
14684 | + if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <= | |
14685 | + node->total_vsectors) { | |
14686 | + R_IO(((struct evms_logical_node *) (node->private)), bh); | |
14687 | + } else | |
14688 | + passthru_io_error(node, READ, bh); | |
14689 | +} | |
14690 | + | |
14691 | +/* | |
14692 | + * Function: passthru_mgr_write | |
14693 | + * | |
14694 | + */ | |
14695 | +static void | |
14696 | +passthru_mgr_write(struct evms_logical_node *node, struct buffer_head *bh) | |
14697 | +{ | |
14698 | + if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <= | |
14699 | + node->total_vsectors) { | |
14700 | + W_IO(((struct evms_logical_node *) (node->private)), bh); | |
14701 | + } else | |
14702 | + passthru_io_error(node, WRITE, bh); | |
14703 | +} | |
14704 | + | |
14705 | +/* | |
14706 | + * Function: passthru_mgr_ioctl | |
14707 | + * | |
14708 | + */ | |
14709 | +static int | |
14710 | +passthru_mgr_ioctl(struct evms_logical_node *node, | |
14711 | + struct inode *inode, | |
14712 | + struct file *file, unsigned int cmd, unsigned long arg) | |
14713 | +{ | |
14714 | + int rc; | |
14715 | + | |
14716 | + if ((!node) || (!inode)) | |
14717 | + rc = -EINVAL; | |
14718 | + else | |
14719 | + rc = IOCTL(((struct evms_logical_node *) (node->private)), | |
14720 | + inode, file, cmd, arg); | |
14721 | + return (rc); | |
14722 | +} | |
14723 | + | |
14724 | +static int | |
14725 | +passthru_mgr_init_io(struct evms_logical_node *node, int io_flag, /* 0=read, 1=write */ | |
14726 | + u64 sect_nr, /* disk LBA */ | |
14727 | + u64 num_sects, /* # of sectors */ | |
14728 | + void *buf_addr) | |
14729 | +{ /* buffer address */ | |
14730 | + int rc; | |
14731 | + if ((sect_nr + num_sects) <= node->total_vsectors) { | |
14732 | + rc = INIT_IO(((struct evms_logical_node *) (node-> | |
14733 | + private)), | |
14734 | + io_flag, sect_nr, num_sects, buf_addr); | |
14735 | + } else | |
14736 | + rc = -EINVAL; | |
14737 | + return (rc); | |
14738 | +} | |
14739 | + | |
14740 | +/* | |
14741 | + * Function: passthru_init | |
14742 | + * | |
14743 | + */ | |
14744 | +int __init | |
14745 | +evms_passthru_manager_init(void) | |
14746 | +{ | |
14747 | + return evms_cs_register_plugin(&plugin_header); /* register with EVMS */ | |
14748 | +} | |
14749 | + | |
14750 | +void __exit | |
14751 | +evms_passthru_manager_exit(void) | |
14752 | +{ | |
14753 | + evms_cs_unregister_plugin(&plugin_header); | |
14754 | +} | |
14755 | + | |
14756 | +module_init(evms_passthru_manager_init); | |
14757 | +module_exit(evms_passthru_manager_exit); | |
14758 | +#ifdef MODULE_LICENSE | |
14759 | +MODULE_LICENSE("GPL"); | |
14760 | +#endif | |
14761 | diff -Naur linux-2002-09-30/drivers/evms/gpt_part.c evms-2002-09-30/drivers/evms/gpt_part.c | |
14762 | --- linux-2002-09-30/drivers/evms/gpt_part.c Wed Dec 31 18:00:00 1969 | |
14763 | +++ evms-2002-09-30/drivers/evms/gpt_part.c Fri Sep 13 16:09:55 2002 | |
14764 | @@ -0,0 +1,1018 @@ | |
14765 | +/* -*- linux-c -*- */ | |
14766 | +/* | |
14767 | + * | |
14768 | + * | |
14769 | + * Copyright (c) International Business Machines Corp., 2000 | |
14770 | + * | |
14771 | + * This program is free software; you can redistribute it and/or modify | |
14772 | + * it under the terms of the GNU General Public License as published by | |
14773 | + * the Free Software Foundation; either version 2 of the License, or | |
14774 | + * (at your option) any later version. | |
14775 | + * | |
14776 | + * This program is distributed in the hope that it will be useful, | |
14777 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14778 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
14779 | + * the GNU General Public License for more details. | |
14780 | + * | |
14781 | + * You should have received a copy of the GNU General Public License | |
14782 | + * along with this program; if not, write to the Free Software | |
14783 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
14784 | + * | |
14785 | + * | |
14786 | + */ | |
14787 | + | |
14788 | +/* linux/driver/evms/gpt_part.c | |
14789 | + * | |
14790 | + * EVMS - EFI GPT segment manager plugin | |
14791 | + * | |
14792 | + * This plugin provides support for the GUID Partition Table format specified | |
14793 | + * by the Extensible Firmware Interface documentation ... version 1.02 | |
14794 | + */ | |
14795 | + | |
14796 | +#include <linux/config.h> | |
14797 | +#include <linux/module.h> | |
14798 | +#include <linux/kernel.h> | |
14799 | +#include <linux/config.h> | |
14800 | +#include <linux/string.h> | |
14801 | +#include <linux/blk.h> | |
14802 | +#include <asm/uaccess.h> | |
14803 | +#include <linux/evms/evms.h> | |
14804 | + | |
14805 | +/* prefix used in logging messages */ | |
14806 | +#define LOG_PREFIX "gpt_part: " | |
14807 | + | |
14808 | +/** | |
14809 | + * struct gpt_private - Private data structure for this plugin | |
14810 | + * @source_object: object this IO will get remapped to | |
14811 | + * @start_sect: source object relative starting address in 512 byte units | |
14812 | + * @nr_sect: partition size in 512 bytes units | |
14813 | + * @type: partition type or filesystem format indicator | |
14814 | + * | |
14815 | + * private copy of the just the fields we require to remap IO requests | |
14816 | + * to the underlying object. | |
14817 | + **/ | |
14818 | +struct gpt_private { | |
14819 | + struct evms_logical_node *source_disk; | |
14820 | + u64 start_sect; | |
14821 | + u64 nr_sects; | |
14822 | + unsigned char type; | |
14823 | +}; | |
14824 | + | |
14825 | +#define GPT_DISKMAGIC 0x5452415020494645 // "EFI PART" | |
14826 | +#define GPT_PNAME_SIZE 36 // max unicode partition name size | |
14827 | + | |
14828 | +/** | |
14829 | + * struct guid - GUID structure | |
14830 | + * @time_low: timestamp - low order 32 bits | |
14831 | + * @time_mid: timestamp - mid 16 bits | |
14832 | + * @time_high: timestamp - high 16 bits | |
14833 | + * @clock_seq_high: clock - high order 8 bits | |
14834 | + * @clock_seq_low: clock - low order 8 bits | |
14835 | + * @node: spatial reference - unique id (ie. mac address of nic) | |
14836 | + * | |
14837 | + * GUID structure | |
14838 | + **/ | |
14839 | +struct guid { | |
14840 | + u32 time_low; | |
14841 | + u16 time_mid; | |
14842 | + u16 time_high; | |
14843 | + u8 clock_seq_high; | |
14844 | + u8 clock_seq_low; | |
14845 | + u8 node[6]; | |
14846 | +}; | |
14847 | + | |
14848 | +/** | |
14849 | + * struct gpt_partition - GPT partition record definition | |
14850 | + * @type: partition type | |
14851 | + * @part_id: partition record id | |
14852 | + * @start: address of 1st block of partition | |
14853 | + * @end: address of last block of partition | |
14854 | + * @attributes: bit field reserved by EFI spec | |
14855 | + * @name: unicode name of partition | |
14856 | + * | |
14857 | + * GPT partition record definition | |
14858 | + **/ | |
14859 | +struct gpt_partition { | |
14860 | + struct guid type; | |
14861 | + struct guid part_id; | |
14862 | + u64 start; | |
14863 | + u64 end; | |
14864 | + u64 attributes; | |
14865 | + u16 name[GPT_PNAME_SIZE]; | |
14866 | +}; | |
14867 | + | |
14868 | +/** | |
14869 | + * struct gpt_header - GPT header | |
14870 | + * @signature: EFI compatible header signature | |
14871 | + * @version: spec revision number | |
14872 | + * @size: size (bytes) of gpt header | |
14873 | + * @crc: crc of gpt header | |
14874 | + * @reserve: reserved by spec ... must be zero | |
14875 | + * @my_lba: lba of gpt header | |
14876 | + * @alternate_lba: lba of 2nd copy of gpt header | |
14877 | + * @start_useable: lba of 1st block of useable area on disk | |
14878 | + * @end_useable: lba of last block of useable area on disk | |
14879 | + * @disk_id: GUID - identifies this disk | |
14880 | + * @ptable_lba: lba of partition table | |
14881 | + * @ptable_count: number of entries in the partition table | |
14882 | + * @ptable_entry_size: size of partition table entry | |
14883 | + * @ptable_crc: crc of partition table | |
14884 | + * | |
14885 | + * GPT header | |
14886 | + **/ | |
14887 | +struct gpt_header { | |
14888 | + u64 signature; | |
14889 | + u32 version; | |
14890 | + u32 size; | |
14891 | + u32 crc; | |
14892 | + u32 reserve; | |
14893 | + u64 my_lba; | |
14894 | + u64 alternate_lba; | |
14895 | + u64 start_useable; | |
14896 | + u64 end_useable; | |
14897 | + struct guid disk_id; | |
14898 | + u64 ptable_lba; | |
14899 | + u32 ptable_count; | |
14900 | + u32 ptable_entry_size; | |
14901 | + u32 ptable_crc; | |
14902 | +}; | |
14903 | + | |
14904 | +struct guid EFI_SYSTEM_PARTITION = { | |
14905 | + 0xC12A7328, | |
14906 | + 0xF81F, | |
14907 | + 0x11D2, | |
14908 | + 0xBA, | |
14909 | + 0x4B, | |
14910 | + {0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B} | |
14911 | +}; | |
14912 | + | |
14913 | +struct guid BASIC_DATA_PARTITION = { | |
14914 | + 0xEBD0A0A2, | |
14915 | + 0xB9E5, | |
14916 | + 0x4433, | |
14917 | + 0x87, | |
14918 | + 0xC0, | |
14919 | + {0x68, 0xB6, 0xB7, 0x26, 0x99, 0xC7} | |
14920 | +}; | |
14921 | + | |
14922 | +struct guid LEGACY_MBR_PARTITION = { | |
14923 | + 0x024DEE41, | |
14924 | + 0x33E7, | |
14925 | + 0x11D3, | |
14926 | + 0x9D, | |
14927 | + 0x69, | |
14928 | + {0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F} | |
14929 | +}; | |
14930 | + | |
14931 | +struct guid GPT_SWAP_PARTITION = { | |
14932 | + 0x0657FD6D, | |
14933 | + 0xA4AB, | |
14934 | + 0x43C4, | |
14935 | + 0x84, | |
14936 | + 0xE5, | |
14937 | + {0x09, 0x33, 0xC8, 0x4B, 0x4F, 0x4F} | |
14938 | +}; | |
14939 | + | |
14940 | +struct guid UNUSED_GPT_PARTITION = { | |
14941 | + 0, 0, 0, 0, 0, | |
14942 | + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00} | |
14943 | +}; | |
14944 | + | |
14945 | +static int exported_nodes; /* total # of exported segments | |
14946 | + * produced during this discovery. | |
14947 | + */ | |
14948 | + | |
14949 | +/* Prototypes */ | |
14950 | +static int partition_discover(struct evms_logical_node **); | |
14951 | +static int partition_delete(struct evms_logical_node *); | |
14952 | +static void partition_read(struct evms_logical_node *, struct buffer_head *); | |
14953 | +static void partition_write(struct evms_logical_node *, struct buffer_head *); | |
14954 | +static int partition_ioctl(struct evms_logical_node *, | |
14955 | + struct inode *, | |
14956 | + struct file *, unsigned int, unsigned long); | |
14957 | +static int partition_init_io(struct evms_logical_node *, | |
14958 | + int, u64, u64, void *); | |
14959 | + | |
14960 | +static struct evms_plugin_fops fops = { | |
14961 | + .discover = partition_discover, | |
14962 | + .delete = partition_delete, | |
14963 | + .read = partition_read, | |
14964 | + .write = partition_write, | |
14965 | + .init_io = partition_init_io, | |
14966 | + .ioctl = partition_ioctl | |
14967 | +}; | |
14968 | + | |
14969 | +#define EVMS_GPT_PARTITION_MANAGER_ID 3 | |
14970 | + | |
14971 | +static struct evms_plugin_header plugin_header = { | |
14972 | + .id = SetPluginID(IBM_OEM_ID, | |
14973 | + EVMS_SEGMENT_MANAGER, | |
14974 | + EVMS_GPT_PARTITION_MANAGER_ID), | |
14975 | + .version = { | |
14976 | + .major = 1, | |
14977 | + .minor = 1, | |
14978 | + .patchlevel = 1 | |
14979 | + }, | |
14980 | + .required_services_version = { | |
14981 | + .major = 0, | |
14982 | + .minor = 5, | |
14983 | + .patchlevel = 0 | |
14984 | + }, | |
14985 | + .fops = &fops | |
14986 | +}; | |
14987 | + | |
14988 | +/***************************************************/ | |
14989 | +/* List Support - Typedefs, Variables, & Functions */ | |
14990 | +/***************************************************/ | |
14991 | + | |
14992 | +/* Typedefs */ | |
14993 | + | |
14994 | +struct segment_list_node { | |
14995 | + struct evms_logical_node *segment; | |
14996 | + struct segment_list_node *next; | |
14997 | +}; | |
14998 | + | |
14999 | +struct disk_list_node { | |
15000 | + struct evms_logical_node *disk; | |
15001 | + struct segment_list_node *segment_list; | |
15002 | + struct disk_list_node *next; | |
15003 | +}; | |
15004 | + | |
15005 | +/* Variables */ | |
15006 | + | |
15007 | +static struct disk_list_node *my_disk_list; | |
15008 | + | |
15009 | +/* Functions */ | |
15010 | + | |
15011 | +/* | |
15012 | + * Function: Convert a GPT header from disk format to the arch specific | |
15013 | + * format. | |
15014 | + */ | |
15015 | +static void | |
15016 | +disk_gpt_header_to_cpu(struct gpt_header *gh) | |
15017 | +{ | |
15018 | + gh->signature = le64_to_cpu(gh->signature); | |
15019 | + gh->version = le32_to_cpu(gh->version); | |
15020 | + gh->size = le32_to_cpu(gh->size); | |
15021 | + gh->crc = le32_to_cpu(gh->crc); | |
15022 | + gh->reserve = le32_to_cpu(gh->reserve); | |
15023 | + gh->my_lba = le64_to_cpu(gh->my_lba); | |
15024 | + gh->alternate_lba = le64_to_cpu(gh->alternate_lba); | |
15025 | + gh->start_useable = le64_to_cpu(gh->start_useable); | |
15026 | + gh->end_useable = le64_to_cpu(gh->end_useable); | |
15027 | + gh->disk_id.time_low = le32_to_cpu(gh->disk_id.time_low); | |
15028 | + gh->disk_id.time_mid = le16_to_cpu(gh->disk_id.time_mid); | |
15029 | + gh->disk_id.time_high = le16_to_cpu(gh->disk_id.time_high); | |
15030 | + gh->ptable_lba = le64_to_cpu(gh->ptable_lba); | |
15031 | + gh->ptable_count = le32_to_cpu(gh->ptable_count); | |
15032 | + gh->ptable_entry_size = le32_to_cpu(gh->ptable_entry_size); | |
15033 | + gh->ptable_crc = le32_to_cpu(gh->ptable_crc); | |
15034 | +} | |
15035 | + | |
15036 | +static int | |
15037 | +matching_guids(struct guid *g1, struct guid *g2) | |
15038 | +{ | |
15039 | + if ((le32_to_cpu(g1->time_low) == g2->time_low) && | |
15040 | + (le16_to_cpu(g1->time_mid) == g2->time_mid) && | |
15041 | + (le16_to_cpu(g1->time_high) == g2->time_high) && | |
15042 | + (g1->clock_seq_high == g2->clock_seq_high) && | |
15043 | + (g1->clock_seq_low == g2->clock_seq_low)) { | |
15044 | + return 1; | |
15045 | + } | |
15046 | + return 0; | |
15047 | +} | |
15048 | +static inline int | |
15049 | +isa_basic_data_gpt_partition_record(struct gpt_partition *p) | |
15050 | +{ | |
15051 | + return (matching_guids(&p->type, &BASIC_DATA_PARTITION)); | |
15052 | +} | |
15053 | +static inline int | |
15054 | +isa_legacy_mbr_gpt_partition_record(struct gpt_partition *p) | |
15055 | +{ | |
15056 | + return (matching_guids(&p->type, &LEGACY_MBR_PARTITION)); | |
15057 | +} | |
15058 | +static inline int | |
15059 | +isa_esp_gpt_partition_record(struct gpt_partition *p) | |
15060 | +{ | |
15061 | + return (matching_guids(&p->type, &EFI_SYSTEM_PARTITION)); | |
15062 | +} | |
15063 | +static inline int | |
15064 | +isa_gpt_swap_partition_record(struct gpt_partition *p) | |
15065 | +{ | |
15066 | + return (matching_guids(&p->type, &GPT_SWAP_PARTITION)); | |
15067 | +} | |
15068 | +static inline int | |
15069 | +isa_unused_gpt_partition_record(struct gpt_partition *p) | |
15070 | +{ | |
15071 | + return (matching_guids(&p->type, &UNUSED_GPT_PARTITION)); | |
15072 | +} | |
15073 | + | |
15074 | +static struct disk_list_node ** | |
15075 | +lookup_disk(struct evms_logical_node *disk) | |
15076 | +{ | |
15077 | + struct disk_list_node **ldln; | |
15078 | + | |
15079 | + ldln = &my_disk_list; | |
15080 | + while (*ldln) { | |
15081 | + if ((*ldln)->disk == disk) | |
15082 | + break; | |
15083 | + ldln = &(*ldln)->next; | |
15084 | + } | |
15085 | + return (ldln); | |
15086 | +} | |
15087 | + | |
15088 | +static struct segment_list_node ** | |
15089 | +lookup_segment(struct disk_list_node *disk, struct evms_logical_node *segment) | |
15090 | +{ | |
15091 | + struct segment_list_node **lsln; | |
15092 | + | |
15093 | + lsln = &disk->segment_list; | |
15094 | + while (*lsln) { | |
15095 | + if ((*lsln)->segment == segment) | |
15096 | + break; | |
15097 | + lsln = &(*lsln)->next; | |
15098 | + } | |
15099 | + return (lsln); | |
15100 | +} | |
15101 | + | |
15102 | +static struct evms_logical_node * | |
15103 | +find_segment_on_disk(struct evms_logical_node *disk, | |
15104 | + u64 start_sect, u64 nr_sects) | |
15105 | +{ | |
15106 | + struct evms_logical_node *rc = NULL; | |
15107 | + struct disk_list_node **ldln; | |
15108 | + struct segment_list_node **lsln; | |
15109 | + struct gpt_private *gpt_prv; | |
15110 | + | |
15111 | + ldln = lookup_disk(disk); | |
15112 | + if (*ldln) { | |
15113 | + /* disk found in list */ | |
15114 | + /* attempt to find segment */ | |
15115 | + | |
15116 | + lsln = &(*ldln)->segment_list; | |
15117 | + while (*lsln) { | |
15118 | + gpt_prv = (*lsln)->segment->private; | |
15119 | + if (gpt_prv->start_sect == start_sect) | |
15120 | + if (gpt_prv->nr_sects == nr_sects) | |
15121 | + break; | |
15122 | + lsln = &(*lsln)->next; | |
15123 | + } | |
15124 | + if (*lsln) | |
15125 | + rc = (*lsln)->segment; | |
15126 | + } | |
15127 | + return (rc); | |
15128 | +} | |
15129 | + | |
15130 | +/* function description: add_segment_to_disk | |
15131 | + * | |
15132 | + * this function attempts to add a segment to the segment | |
15133 | + * list of a disk. if the specified disk is not found, it | |
15134 | + * will be added to the global disk list. this function will | |
15135 | + * return a pointer to the matching segment in the disk's | |
15136 | + * segment list. the caller must compare the returned pointer | |
15137 | + * to the specified segment to see if the | |
15138 | + * specified segment was already present in the disk's segment | |
15139 | + * list. if the return pointer matches the specified segment, | |
15140 | + * then the specified segment was added to the list. if the | |
15141 | + * return segment pointer to does not match the specified | |
15142 | + * segment pointer, then the specified segment pointer was | |
15143 | + * a duplicate and can be thrown away. | |
15144 | + */ | |
15145 | +static int | |
15146 | +add_segment_to_disk(struct evms_logical_node *disk, | |
15147 | + struct evms_logical_node *segment) | |
15148 | +{ | |
15149 | + int rc = 0; | |
15150 | + struct disk_list_node **ldln, *new_disk; | |
15151 | + struct segment_list_node **lsln, *new_segment; | |
15152 | + | |
15153 | + ldln = lookup_disk(disk); | |
15154 | + if (*ldln == NULL) { | |
15155 | + /* disk not in list, add disk */ | |
15156 | + new_disk = kmalloc(sizeof (*new_disk), GFP_KERNEL); | |
15157 | + if (new_disk) { | |
15158 | + memset(new_disk, 0, sizeof (*new_disk)); | |
15159 | + new_disk->disk = disk; | |
15160 | + *ldln = new_disk; | |
15161 | + } else { | |
15162 | + rc = -ENOMEM; | |
15163 | + } | |
15164 | + } | |
15165 | + if (!rc) { | |
15166 | + /* attempt to add segment */ | |
15167 | + lsln = lookup_segment(*ldln, segment); | |
15168 | + if (*lsln == NULL) { | |
15169 | + /* segment not in list, add segment */ | |
15170 | + new_segment = | |
15171 | + kmalloc(sizeof (*new_segment), GFP_KERNEL); | |
15172 | + if (new_segment) { | |
15173 | + memset(new_segment, 0, sizeof (*new_segment)); | |
15174 | + new_segment->segment = segment; | |
15175 | + *lsln = new_segment; | |
15176 | + } else { | |
15177 | + rc = -ENOMEM; | |
15178 | + } | |
15179 | + } else | |
15180 | + rc = -1; | |
15181 | + } | |
15182 | + return (rc); | |
15183 | +} | |
15184 | + | |
15185 | +static int | |
15186 | +remove_segment_from_disk(struct evms_logical_node *disk, | |
15187 | + struct evms_logical_node *segment, | |
15188 | + struct evms_logical_node **empty_disk) | |
15189 | +{ | |
15190 | + int rc = 0; | |
15191 | + struct disk_list_node **ldln, *tmp_disk_node; | |
15192 | + struct segment_list_node **lsln, *tmp_segment_node; | |
15193 | + | |
15194 | + *empty_disk = NULL; | |
15195 | + ldln = lookup_disk(disk); | |
15196 | + if (*ldln == NULL) { | |
15197 | + rc = -1; | |
15198 | + } else { | |
15199 | + /* disk found in list */ | |
15200 | + /* attempt to add segment */ | |
15201 | + lsln = lookup_segment(*ldln, segment); | |
15202 | + if (*lsln == NULL) { | |
15203 | + rc = -2; | |
15204 | + } else { | |
15205 | + tmp_segment_node = *lsln; | |
15206 | + /* remove segment from list */ | |
15207 | + *lsln = (*lsln)->next; | |
15208 | + /* free the segment list node */ | |
15209 | + kfree(tmp_segment_node); | |
15210 | + | |
15211 | + if ((*ldln)->segment_list == NULL) { | |
15212 | + tmp_disk_node = *ldln; | |
15213 | + *empty_disk = tmp_disk_node->disk; | |
15214 | + /* remove disk from list */ | |
15215 | + *ldln = (*ldln)->next; | |
15216 | + /* free the disk list node */ | |
15217 | + kfree(tmp_disk_node); | |
15218 | + } | |
15219 | + } | |
15220 | + } | |
15221 | + return (rc); | |
15222 | +} | |
15223 | + | |
15224 | +/* | |
15225 | + * Function: add_segment | |
15226 | + */ | |
15227 | +static int | |
15228 | +process_segment(struct evms_logical_node **discover_list, | |
15229 | + struct evms_logical_node *node, | |
15230 | + u64 start_sect, | |
15231 | + u64 nr_sects, | |
15232 | + int type, int part_num, int evms_top_segment) | |
15233 | +{ | |
15234 | + struct gpt_private *gpt_prv = NULL; | |
15235 | + struct evms_logical_node *segment; | |
15236 | + int rc = 0; | |
15237 | + | |
15238 | + segment = find_segment_on_disk(node, start_sect, nr_sects); | |
15239 | + if (segment) { | |
15240 | + LOG_DETAILS("exporting segment '%s'.\n", segment->name); | |
15241 | + } else { | |
15242 | + gpt_prv = kmalloc(sizeof (*gpt_prv), GFP_KERNEL); | |
15243 | + if (gpt_prv) { | |
15244 | + gpt_prv->source_disk = node; | |
15245 | + gpt_prv->start_sect = start_sect; | |
15246 | + gpt_prv->nr_sects = nr_sects; | |
15247 | + gpt_prv->type = type; | |
15248 | + rc = evms_cs_allocate_logical_node(&segment); | |
15249 | + } else { | |
15250 | + rc = -ENOMEM; | |
15251 | + } | |
15252 | + if (!rc) { | |
15253 | + segment->plugin = &plugin_header; | |
15254 | + segment->system_id = (unsigned int) type; | |
15255 | + segment->total_vsectors = nr_sects; | |
15256 | + segment->block_size = node->block_size; | |
15257 | + segment->hardsector_size = node->hardsector_size; | |
15258 | + segment->private = gpt_prv; | |
15259 | + segment->flags = node->flags; | |
15260 | + if (evms_top_segment) | |
15261 | + segment->iflags |= EVMS_TOP_SEGMENT; | |
15262 | + strcpy(segment->name, node->name); | |
15263 | + if (GetPluginType(node->plugin->id) == | |
15264 | + EVMS_SEGMENT_MANAGER) { | |
15265 | + strcat(segment->name, "."); | |
15266 | + } | |
15267 | + sprintf(segment->name + strlen(segment->name), "%d", | |
15268 | + part_num); | |
15269 | + LOG_DETAILS("creating segment '%s'.\n", segment->name); | |
15270 | + rc = add_segment_to_disk(node, segment); | |
15271 | + if (rc) { | |
15272 | + LOG_ERROR | |
15273 | + ("%s: error(%d) adding segment '%s'!\n", | |
15274 | + __FUNCTION__, rc, segment->name); | |
15275 | + rc = 0; | |
15276 | + } else { | |
15277 | + MOD_INC_USE_COUNT; | |
15278 | + } | |
15279 | + } | |
15280 | + if (rc) { | |
15281 | + if (gpt_prv) | |
15282 | + kfree(gpt_prv); | |
15283 | + if (segment) | |
15284 | + evms_cs_deallocate_logical_node(segment); | |
15285 | + } | |
15286 | + } | |
15287 | + if (!rc) { | |
15288 | + evms_cs_add_logical_node_to_list(discover_list, segment); | |
15289 | + exported_nodes++; | |
15290 | + } | |
15291 | + return rc; | |
15292 | +} | |
15293 | + | |
15294 | +void | |
15295 | +print_mem(void *buffer, int length) | |
15296 | +{ | |
15297 | + int i, done; | |
15298 | + unsigned char *bufptr; | |
15299 | + | |
15300 | + bufptr = (unsigned char *) buffer; | |
15301 | + i = done = 0; | |
15302 | + while (!done) { | |
15303 | + if ((i % 16) == 0) | |
15304 | + printk(KERN_INFO "\n0x%p->", buffer + i); | |
15305 | + printk(KERN_INFO "%02x ", bufptr[i]); | |
15306 | + if (++i >= length) | |
15307 | + done++; | |
15308 | + } | |
15309 | + printk(KERN_INFO "\n"); | |
15310 | +} | |
15311 | + | |
15312 | +/* | |
15313 | + * Function: get GPT Partition Table - reads partition table | |
15314 | + * into memory and performs crc check. | |
15315 | + * | |
15316 | + */ | |
15317 | +static struct gpt_partition * | |
15318 | +get_gpt_partition_table(struct evms_logical_node *node, struct gpt_header *gh) | |
15319 | +{ | |
15320 | + int rc; | |
15321 | + struct gpt_partition *pt; | |
15322 | + u32 sector_count, calculated_crc; | |
15323 | + | |
15324 | + sector_count = | |
15325 | + evms_cs_size_in_vsectors(gh->ptable_count * gh->ptable_entry_size); | |
15326 | + | |
15327 | + pt = kmalloc(sector_count * EVMS_VSECTOR_SIZE, GFP_KERNEL); | |
15328 | + if (pt) { | |
15329 | + | |
15330 | + rc = INIT_IO(node, 0, gh->ptable_lba, sector_count, pt); | |
15331 | + if (!rc) { | |
15332 | + | |
15333 | + calculated_crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC, | |
15334 | + pt, | |
15335 | + gh-> | |
15336 | + ptable_count * | |
15337 | + gh-> | |
15338 | + ptable_entry_size); | |
15339 | + | |
15340 | + if (~calculated_crc != gh->ptable_crc) { | |
15341 | + rc = -ENODATA; | |
15342 | + } | |
15343 | + | |
15344 | + } | |
15345 | + } else { | |
15346 | + rc = -ENOMEM; | |
15347 | + } | |
15348 | + | |
15349 | + if (rc) { | |
15350 | + if (pt) | |
15351 | + kfree(pt); | |
15352 | + pt = NULL; | |
15353 | + } | |
15354 | + | |
15355 | + return (pt); | |
15356 | +} | |
15357 | + | |
15358 | +/* | |
15359 | + * Function: Validate GPT Header - runs basic checks to | |
15360 | + * sanity check a gpt header. | |
15361 | + * | |
15362 | + */ | |
15363 | +static int | |
15364 | +isa_valid_gpt_header(struct evms_logical_node *node, u64 lsn, | |
15365 | + struct gpt_header *gh) | |
15366 | +{ | |
15367 | + u32 crc; | |
15368 | + u32 calculated_crc; | |
15369 | + u64 sector_count; | |
15370 | + | |
15371 | + /* signature */ | |
15372 | + if (le64_to_cpu(gh->signature) != GPT_DISKMAGIC) | |
15373 | + return 0; | |
15374 | + | |
15375 | + /* crc */ | |
15376 | + crc = le32_to_cpu(gh->crc); | |
15377 | + gh->crc = 0; | |
15378 | + calculated_crc = | |
15379 | + ~(evms_cs_calculate_crc(EVMS_INITIAL_CRC, gh, le32_to_cpu(gh->size))); | |
15380 | + gh->crc = cpu_to_le32(crc); | |
15381 | + | |
15382 | + if (calculated_crc != crc) | |
15383 | + return 0; | |
15384 | + | |
15385 | + /* spec says lba reported by header must match actual location on disk */ | |
15386 | + if (lsn != le64_to_cpu(gh->my_lba)) | |
15387 | + return 0; | |
15388 | + | |
15389 | + /* sanity check partition table info found in header */ | |
15390 | + if (gh->ptable_count == 0 || gh->ptable_entry_size == 0) | |
15391 | + return 0; | |
15392 | + | |
15393 | + sector_count = | |
15394 | + evms_cs_size_in_vsectors(le64_to_cpu(gh->ptable_count) * | |
15395 | + le64_to_cpu(gh->ptable_entry_size)); | |
15396 | + | |
15397 | + if ((le64_to_cpu(gh->ptable_lba) + sector_count - 1) >= | |
15398 | + node->total_vsectors - 1) | |
15399 | + return 0; | |
15400 | + | |
15401 | + return 1; | |
15402 | +} | |
15403 | + | |
15404 | +/* | |
15405 | + * Function: get GPT Partition Table Header | |
15406 | + * | |
15407 | + */ | |
15408 | +static struct gpt_header * | |
15409 | +get_gpt_header(struct evms_logical_node *node, u64 lsn) | |
15410 | +{ | |
15411 | + int rc; | |
15412 | + struct gpt_header *gh = NULL; | |
15413 | + | |
15414 | + gh = kmalloc(EVMS_VSECTOR_SIZE, GFP_KERNEL); | |
15415 | + if (gh) { | |
15416 | + rc = INIT_IO(node, 0, lsn, 1, gh); | |
15417 | + if (!rc) { | |
15418 | + if (isa_valid_gpt_header(node, lsn, gh)) { | |
15419 | + disk_gpt_header_to_cpu(gh); | |
15420 | + } else { | |
15421 | + rc = -ENODATA; | |
15422 | + } | |
15423 | + | |
15424 | + } | |
15425 | + if (rc) { | |
15426 | + kfree(gh); | |
15427 | + gh = NULL; | |
15428 | + } | |
15429 | + } | |
15430 | + | |
15431 | + return (gh); | |
15432 | +} | |
15433 | + | |
15434 | +/* | |
15435 | + * Function: Get GPT Information | |
15436 | + * | |
15437 | + */ | |
15438 | +static int | |
15439 | +get_gpt_info(struct evms_logical_node *node, | |
15440 | + struct gpt_header **gh, struct gpt_partition **ptable) | |
15441 | +{ | |
15442 | + struct gpt_header *gh1 = NULL, *gh2 = NULL; | |
15443 | + | |
15444 | + *gh = NULL; | |
15445 | + *ptable = NULL; | |
15446 | + | |
15447 | + gh1 = get_gpt_header(node, 1); // offset past protective mbr | |
15448 | + | |
15449 | + if (gh1) { | |
15450 | + *gh = gh1; | |
15451 | + gh2 = get_gpt_header(node, gh1->alternate_lba); | |
15452 | + if (gh2) | |
15453 | + kfree(gh2); | |
15454 | + else | |
15455 | + LOG_WARNING | |
15456 | + ("alternate guid partition table header is invalid, using primary copy.\n"); | |
15457 | + } else { | |
15458 | + gh2 = get_gpt_header(node, node->total_vsectors - 1); | |
15459 | + if (gh2) { | |
15460 | + *gh = gh2; | |
15461 | + LOG_WARNING | |
15462 | + ("primary guid partition table header is invalid, using alternate copy\n"); | |
15463 | + } else { | |
15464 | + LOG_DETAILS("no gpt header discovered on node %s\n", | |
15465 | + node->name); | |
15466 | + return 0; | |
15467 | + } | |
15468 | + } | |
15469 | + | |
15470 | + *ptable = get_gpt_partition_table(node, *gh); | |
15471 | + if (!*ptable) { | |
15472 | + kfree(*gh); | |
15473 | + *gh = NULL; | |
15474 | + return 0; | |
15475 | + } | |
15476 | + | |
15477 | + return 1; | |
15478 | +} | |
15479 | + | |
15480 | +/* | |
15481 | + * Function: Probe for GPT segments on logical node | |
15482 | + * | |
15483 | + */ | |
15484 | +static int | |
15485 | +probe_for_segments(struct evms_logical_node **discover_list, | |
15486 | + struct evms_logical_node *node) | |
15487 | +{ | |
15488 | + int rc; | |
15489 | + int nextminor = 1; | |
15490 | + int evms_top_segment; | |
15491 | + u32 i; | |
15492 | + u64 pstart,pend; | |
15493 | + struct gpt_header *gh = NULL; | |
15494 | + struct gpt_partition *ptable = NULL; | |
15495 | + struct gpt_partition *part = NULL; | |
15496 | + | |
15497 | + /* no need to inspect our own nodes */ | |
15498 | + if (node->plugin->id == plugin_header.id) | |
15499 | + return 0; | |
15500 | + | |
15501 | + /* nor nodes marked as EVMS_TOP_SEGMENT */ | |
15502 | + if (node->iflags & EVMS_TOP_SEGMENT) | |
15503 | + return 0; | |
15504 | + | |
15505 | + /* look for guid partition table & header */ | |
15506 | + if (!get_gpt_info(node, &gh, &ptable)) { | |
15507 | + if (gh) | |
15508 | + kfree(gh); | |
15509 | + if (ptable) | |
15510 | + kfree(ptable); | |
15511 | + return 0; | |
15512 | + } | |
15513 | + | |
15514 | + /* walk the guid partition table, producing segment storage objects */ | |
15515 | + for (i = 0, part = ptable; i < gh->ptable_count; i++, part++) { | |
15516 | + | |
15517 | + if (!isa_unused_gpt_partition_record(part)) { | |
15518 | + | |
15519 | + pstart = le64_to_cpu(part->start); | |
15520 | + pend = le64_to_cpu(part->end); | |
15521 | + | |
15522 | + LOG_DETAILS | |
15523 | + ("gpt partition start="PFU64" end="PFU64"\n", | |
15524 | + pstart, (pend - pstart + 1)); | |
15525 | + | |
15526 | + /* stop other seg mgrs from recursive discovery on a gpt system partition */ | |
15527 | + if (isa_esp_gpt_partition_record(part)) | |
15528 | + evms_top_segment = 1; | |
15529 | + else | |
15530 | + evms_top_segment = 0; | |
15531 | + | |
15532 | + rc = process_segment(discover_list, | |
15533 | + node, | |
15534 | + pstart, | |
15535 | + (pend - pstart + 1), | |
15536 | + 0, nextminor, evms_top_segment); | |
15537 | + | |
15538 | + if (!rc) { | |
15539 | + ++nextminor; | |
15540 | + } | |
15541 | + } | |
15542 | + | |
15543 | + } | |
15544 | + | |
15545 | + /* remove node we just consumed */ | |
15546 | + evms_cs_remove_logical_node_from_list(discover_list, node); | |
15547 | + | |
15548 | + kfree(ptable); | |
15549 | + kfree(gh); | |
15550 | + return 1; | |
15551 | +} | |
15552 | + | |
15553 | +/* | |
15554 | + * Function: partition_discover | |
15555 | + * | |
15556 | + */ | |
15557 | +static int | |
15558 | +partition_discover(struct evms_logical_node **discover_list) | |
15559 | +{ | |
15560 | + int rc = 0; | |
15561 | + struct evms_logical_node *node, *next_node; | |
15562 | + | |
15563 | + MOD_INC_USE_COUNT; | |
15564 | + LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__); | |
15565 | + | |
15566 | + /* initialize global variable */ | |
15567 | + exported_nodes = 0; | |
15568 | + | |
15569 | + /* examine each node on the discover list */ | |
15570 | + next_node = *discover_list; | |
15571 | + while (next_node) { | |
15572 | + node = next_node; | |
15573 | + next_node = node->next; | |
15574 | + probe_for_segments(discover_list, node); | |
15575 | + } | |
15576 | + | |
15577 | + LOG_ENTRY_EXIT("%s: EXIT(exported nodes:%d, error code:%d)\n", | |
15578 | + __FUNCTION__, exported_nodes, rc); | |
15579 | + if (exported_nodes) | |
15580 | + rc = exported_nodes; | |
15581 | + MOD_DEC_USE_COUNT; | |
15582 | + return (rc); | |
15583 | +} | |
15584 | + | |
15585 | +/* | |
15586 | + * Function: partition_delete | |
15587 | + * | |
15588 | + */ | |
15589 | +static int | |
15590 | +partition_delete(struct evms_logical_node *segment) | |
15591 | +{ | |
15592 | + int rc = 0; | |
15593 | + struct gpt_private *gpt_prv; | |
15594 | + struct evms_logical_node *empty_disk = NULL; | |
15595 | + | |
15596 | + LOG_DETAILS("deleting segment '%s'.\n", segment->name); | |
15597 | + | |
15598 | + if (!segment) { | |
15599 | + rc = -ENODEV; | |
15600 | + } else { | |
15601 | + gpt_prv = segment->private; | |
15602 | + if (gpt_prv) { | |
15603 | + /* remove the segment from the | |
15604 | + * disk's segment list | |
15605 | + */ | |
15606 | + rc = remove_segment_from_disk(gpt_prv->source_disk, | |
15607 | + segment, &empty_disk); | |
15608 | + /* free the local instance data */ | |
15609 | + kfree(gpt_prv); | |
15610 | + } | |
15611 | + /* free the segment node */ | |
15612 | + evms_cs_deallocate_logical_node(segment); | |
15613 | + MOD_DEC_USE_COUNT; | |
15614 | + /* if the last segment on the disk was | |
15615 | + * deleted, delete the disk node too | |
15616 | + */ | |
15617 | + if (empty_disk) | |
15618 | + DELETE(empty_disk); | |
15619 | + } | |
15620 | + return (rc); | |
15621 | +} | |
15622 | + | |
15623 | +/* | |
15624 | + * function: partition_io_error | |
15625 | + * | |
15626 | + * this function was primarily created because the function | |
15627 | + * buffer_IO_error is inline and kgdb doesn't allow breakpoints | |
15628 | + * to be set on inline functions. Since this was an error path | |
15629 | + * and not mainline, I decided to add a trace statement to help | |
15630 | + * report on the failing condition. | |
15631 | + * | |
15632 | + */ | |
15633 | +static void | |
15634 | +partition_io_error(struct evms_logical_node *node, int io_flag, | |
15635 | + struct buffer_head *bh) | |
15636 | +{ | |
15637 | + LOG_SERIOUS | |
15638 | + ("attempt to %s beyond partition boundary("PFU64") on (%s), rsector(%ld).\n", | |
15639 | + (io_flag) ? "WRITE" : "READ", node->total_vsectors - 1, node->name, | |
15640 | + bh->b_rsector); | |
15641 | + | |
15642 | + bh->b_end_io(bh, 0); | |
15643 | +} | |
15644 | + | |
15645 | +/* | |
15646 | + * Function: partition_read | |
15647 | + * | |
15648 | + */ | |
15649 | +static void | |
15650 | +partition_read(struct evms_logical_node *partition, struct buffer_head *bh) | |
15651 | +{ | |
15652 | + struct gpt_private *gpt_prv = partition->private; | |
15653 | + | |
15654 | + if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <= | |
15655 | + partition->total_vsectors) { | |
15656 | + bh->b_rsector += gpt_prv->start_sect; | |
15657 | + R_IO(gpt_prv->source_disk, bh); | |
15658 | + } else | |
15659 | + partition_io_error(partition, READ, bh); | |
15660 | +} | |
15661 | + | |
15662 | +/* | |
15663 | + * Function: partition_write | |
15664 | + * | |
15665 | + */ | |
15666 | +static void | |
15667 | +partition_write(struct evms_logical_node *partition, struct buffer_head *bh) | |
15668 | +{ | |
15669 | + struct gpt_private *gpt_prv = partition->private; | |
15670 | + | |
15671 | + if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <= | |
15672 | + partition->total_vsectors) { | |
15673 | + bh->b_rsector += gpt_prv->start_sect; | |
15674 | + W_IO(gpt_prv->source_disk, bh); | |
15675 | + } else | |
15676 | + partition_io_error(partition, WRITE, bh); | |
15677 | +} | |
15678 | + | |
15679 | +/* | |
15680 | + * Function: partition_init_io | |
15681 | + * | |
15682 | + */ | |
15683 | +static int | |
15684 | +partition_init_io(struct evms_logical_node *partition, int io_flag, /* 0=read, 1=write */ | |
15685 | + u64 sect_nr, /* disk LBA */ | |
15686 | + u64 num_sects, /* # of sectors */ | |
15687 | + void *buf_addr) | |
15688 | +{ /* buffer address */ | |
15689 | + int rc; | |
15690 | + struct gpt_private *gpt_prv = partition->private; | |
15691 | + | |
15692 | + if ((sect_nr + num_sects) <= partition->total_vsectors) { | |
15693 | + rc = INIT_IO(gpt_prv->source_disk, io_flag, | |
15694 | + sect_nr + gpt_prv->start_sect, num_sects, | |
15695 | + buf_addr); | |
15696 | + } else { | |
15697 | + LOG_SERIOUS | |
15698 | + ("init_io: attempt to %s beyond partition(%s) boundary("PFU64") at sector("PFU64") for count("PFU64").\n", | |
15699 | + (io_flag) ? "WRITE" : "READ", partition->name, | |
15700 | + (gpt_prv->nr_sects - 1), sect_nr, num_sects); | |
15701 | + rc = -EINVAL; | |
15702 | + } | |
15703 | + | |
15704 | + return (rc); | |
15705 | +} | |
15706 | + | |
15707 | +/* | |
15708 | + * Function: partition_ioctl | |
15709 | + * | |
15710 | + */ | |
15711 | +static int | |
15712 | +partition_ioctl(struct evms_logical_node *partition, | |
15713 | + struct inode *inode, | |
15714 | + struct file *file, unsigned int cmd, unsigned long arg) | |
15715 | +{ | |
15716 | + struct gpt_private *gpt_prv; | |
15717 | + struct hd_geometry hd_geo; | |
15718 | + int rc; | |
15719 | + | |
15720 | + rc = 0; | |
15721 | + gpt_prv = partition->private; | |
15722 | + if (!inode) | |
15723 | + return -EINVAL; | |
15724 | + switch (cmd) { | |
15725 | + case HDIO_GETGEO: | |
15726 | + { | |
15727 | + rc = IOCTL(gpt_prv->source_disk, inode, file, cmd, arg); | |
15728 | + if (rc) | |
15729 | + break; | |
15730 | + if (copy_from_user | |
15731 | + (&hd_geo, (void *) arg, | |
15732 | + sizeof (struct hd_geometry))) | |
15733 | + rc = -EFAULT; | |
15734 | + if (rc) | |
15735 | + break; | |
15736 | + hd_geo.start = gpt_prv->start_sect; | |
15737 | + if (copy_to_user | |
15738 | + ((void *) arg, &hd_geo, | |
15739 | + sizeof (struct hd_geometry))) | |
15740 | + rc = -EFAULT; | |
15741 | + } | |
15742 | + break; | |
15743 | + case EVMS_GET_BMAP: | |
15744 | + { | |
15745 | + struct evms_get_bmap_pkt *bmap = | |
15746 | + (struct evms_get_bmap_pkt *) arg; | |
15747 | + bmap->rsector += gpt_prv->start_sect; | |
15748 | + /* intentionally fall thru to | |
15749 | + * default ioctl down to device | |
15750 | + * manager. | |
15751 | + */ | |
15752 | + } | |
15753 | + default: | |
15754 | + rc = IOCTL(gpt_prv->source_disk, inode, file, cmd, arg); | |
15755 | + } | |
15756 | + return rc; | |
15757 | +} | |
15758 | + | |
15759 | +/* | |
15760 | + * Function: gpt_module_init | |
15761 | + * | |
15762 | + */ | |
15763 | +static int __init | |
15764 | +gpt_module_init(void) | |
15765 | +{ | |
15766 | + return evms_cs_register_plugin(&plugin_header); /* register with EVMS */ | |
15767 | +} | |
15768 | + | |
15769 | +/* | |
15770 | + * Function: gpt module exit | |
15771 | + */ | |
15772 | +static void __exit | |
15773 | +gpt_module_exit(void) | |
15774 | +{ | |
15775 | + evms_cs_unregister_plugin(&plugin_header); | |
15776 | +} | |
15777 | + | |
15778 | +module_init(gpt_module_init); | |
15779 | +module_exit(gpt_module_exit); | |
15780 | +#ifdef MODULE_LICENSE | |
15781 | +MODULE_LICENSE("GPL"); | |
15782 | +#endif | |
15783 | diff -Naur linux-2002-09-30/drivers/evms/ldev_mgr.c evms-2002-09-30/drivers/evms/ldev_mgr.c | |
15784 | --- linux-2002-09-30/drivers/evms/ldev_mgr.c Wed Dec 31 18:00:00 1969 | |
15785 | +++ evms-2002-09-30/drivers/evms/ldev_mgr.c Fri Sep 13 16:45:06 2002 | |
15786 | @@ -0,0 +1,1500 @@ | |
15787 | +/* -*- linux-c -*- */ | |
15788 | +/* | |
15789 | + * | |
15790 | + * Copyright (c) International Business Machines Corp., 2000 | |
15791 | + * | |
15792 | + * This program is free software; you can redistribute it and/or modify | |
15793 | + * it under the terms of the GNU General Public License as published by | |
15794 | + * the Free Software Foundation; either version 2 of the License, or | |
15795 | + * (at your option) any later version. | |
15796 | + * | |
15797 | + * This program is distributed in the hope that it will be useful, | |
15798 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15799 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
15800 | + * the GNU General Public License for more details. | |
15801 | + * | |
15802 | + * You should have received a copy of the GNU General Public License | |
15803 | + * along with this program; if not, write to the Free Software | |
15804 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15805 | + */ | |
15806 | + | |
15807 | +/* linux/driver/evms/ldev_mgr.c | |
15808 | + * | |
15809 | + * EVMS - Local Device (Hard Drive) Manager | |
15810 | + * | |
15811 | + * This plugin walks the gendisk list and creates logical disk structures for each | |
15812 | + * local ide or scsi device. | |
15813 | + * | |
15814 | + */ | |
15815 | + | |
15816 | +#include <linux/config.h> | |
15817 | +#include <linux/module.h> | |
15818 | +#include <linux/errno.h> | |
15819 | +#include <linux/kernel.h> | |
15820 | +#include <linux/fs.h> | |
15821 | +#include <linux/slab.h> | |
15822 | +#include <asm/uaccess.h> | |
15823 | +#include <linux/blk.h> /* must be included by all block drivers */ | |
15824 | +#include <linux/genhd.h> | |
15825 | +#include <linux/ide.h> | |
15826 | +#include <linux/version.h> | |
15827 | +#include "../scsi/scsi.h" | |
15828 | +#include "../scsi/sd.h" | |
15829 | +#include <linux/init.h> | |
15830 | +#include <linux/evms/evms.h> | |
15831 | +#include <linux/evms/ldev_mgr.h> | |
15832 | + | |
15833 | +#define LOG_PREFIX "ldev_mgr: " | |
15834 | + | |
15835 | +#define EVMS_LOCAL_DEVICE_MANAGER_ID 1 | |
15836 | + | |
15837 | +/** | |
15838 | + * struct ldev_private - private data used by this plugin | |
15839 | + * @major: major device number | |
15840 | + * @minor: minor device number | |
15841 | + * @bdev: block_device record for this device | |
15842 | + * @gd: gendisk entry for this device | |
15843 | + * @media_changed: media changed status field | |
15844 | + * | |
15845 | + * private data maintained for each device by this plugin | |
15846 | + **/ | |
15847 | +struct ldev_private { | |
15848 | + int major, minor; | |
15849 | + struct block_device *bdev; | |
15850 | + struct gendisk *gd; | |
15851 | + int media_changed; | |
15852 | +}; | |
15853 | + | |
15854 | +/* prototypes for mandatory plugin interface functions */ | |
15855 | +static int discover_disks(struct evms_logical_node **); | |
15856 | +static int ldev_mgr_delete(struct evms_logical_node *); | |
15857 | +static void ldev_mgr_read(struct evms_logical_node *, struct buffer_head *); | |
15858 | +static void ldev_mgr_write(struct evms_logical_node *, struct buffer_head *); | |
15859 | +static int ldev_mgr_ioctl(struct evms_logical_node *, | |
15860 | + struct inode *, | |
15861 | + struct file *, unsigned int, unsigned long); | |
15862 | +static int ldev_init_io(struct evms_logical_node *, | |
15863 | + int, u64, u64, void *); | |
15864 | +static int ldev_mgr_direct_ioctl(struct inode *, | |
15865 | + struct file *, unsigned int, unsigned long); | |
15866 | + | |
15867 | +/* plugin function table definition */ | |
15868 | +static struct evms_plugin_fops fops = { | |
15869 | + .discover = discover_disks, | |
15870 | + .delete = ldev_mgr_delete, | |
15871 | + .read = ldev_mgr_read, | |
15872 | + .write = ldev_mgr_write, | |
15873 | + .init_io = ldev_init_io, | |
15874 | + .ioctl = ldev_mgr_ioctl, | |
15875 | + .direct_ioctl = ldev_mgr_direct_ioctl | |
15876 | +}; | |
15877 | + | |
15878 | +/* plugin header definition */ | |
15879 | +static struct evms_plugin_header plugin_header = { | |
15880 | + .id = SetPluginID(IBM_OEM_ID, | |
15881 | + EVMS_DEVICE_MANAGER, | |
15882 | + EVMS_LOCAL_DEVICE_MANAGER_ID), | |
15883 | + .version = { | |
15884 | + .major = 1, | |
15885 | + .minor = 1, | |
15886 | + .patchlevel = 1 | |
15887 | + }, | |
15888 | + .required_services_version = { | |
15889 | + .major = 0, | |
15890 | + .minor = 5, | |
15891 | + .patchlevel = 0 | |
15892 | + }, | |
15893 | + .fops = &fops | |
15894 | +}; | |
15895 | + | |
15896 | +#define TYPE_NONE 0 | |
15897 | +#define TYPE_GENERIC 1 | |
15898 | +#define TYPE_IDE 2 | |
15899 | +#define TYPE_SCSI 3 | |
15900 | + | |
15901 | +#define INDEX_ALPHA 0 | |
15902 | +#define INDEX_NUMERIC 1 | |
15903 | + | |
15904 | +/********************************************************/ | |
15905 | +/* Required Plugin Function Table Entry Point: */ | |
15906 | +/* Discover function & Support routines */ | |
15907 | +/********************************************************/ | |
15908 | + | |
15909 | +#define MAX_NAME_BASE_SIZE 10 | |
15910 | +#define MAX_NAME_MODIFIER_SIZE 4 | |
15911 | +/** | |
15912 | + * struct blk_device_info - block device info | |
15913 | + * @devnode_name_base: base name (ie. hd or sd) for device | |
15914 | + * @null1: guaranteed end-of-string NULL | |
15915 | + * @devnode_name_modifier: name suffix (ie. ag for sdag) for device | |
15916 | + * @null2: guaranteed end-of-string NULL | |
15917 | + * @devnode_name_index: numeric device index (ie. 1 for hda1) | |
15918 | + * @devnode_name_type: indicates numeric or alpha modifier | |
15919 | + * @devnode_type: device type, IDE, SCSI, or GENERIC | |
15920 | + * | |
15921 | + * generic block device naming descriptor structure | |
15922 | + **/ | |
15923 | +struct blk_device_info { | |
15924 | + char devnode_name_base[MAX_NAME_BASE_SIZE]; | |
15925 | + char null1; | |
15926 | + char devnode_name_modifier[MAX_NAME_MODIFIER_SIZE]; | |
15927 | + char null2; | |
15928 | + int devnode_name_index; | |
15929 | + int devnode_name_type; | |
15930 | + int device_type; | |
15931 | +}; | |
15932 | + | |
15933 | +static struct blk_device_info *blk_dev_info = NULL; | |
15934 | + | |
15935 | +#define BLK_DEV_INFO(a,b,c,d,e) \ | |
15936 | + strncpy(blk_dev_info[a].devnode_name_base, b, MAX_NAME_BASE_SIZE); \ | |
15937 | + blk_dev_info[a].null1 = 0; \ | |
15938 | + strncpy(blk_dev_info[a].devnode_name_modifier, c, MAX_NAME_MODIFIER_SIZE); \ | |
15939 | + blk_dev_info[a].null2 = 0; \ | |
15940 | + blk_dev_info[a].devnode_name_index = 0; \ | |
15941 | + blk_dev_info[a].device_type = d; \ | |
15942 | + blk_dev_info[a].devnode_name_type = e; | |
15943 | + | |
15944 | +static void | |
15945 | +init_blk_dev_info(struct blk_device_info *blk_dev_info) | |
15946 | +{ | |
15947 | + BLK_DEV_INFO(IDE0_MAJOR, "hd", "a", TYPE_IDE, INDEX_ALPHA); | |
15948 | + BLK_DEV_INFO(IDE1_MAJOR, "hd", "c", TYPE_IDE, INDEX_ALPHA); | |
15949 | + BLK_DEV_INFO(IDE2_MAJOR, "hd", "e", TYPE_IDE, INDEX_ALPHA); | |
15950 | + BLK_DEV_INFO(IDE3_MAJOR, "hd", "g", TYPE_IDE, INDEX_ALPHA); | |
15951 | + BLK_DEV_INFO(IDE4_MAJOR, "hd", "i", TYPE_IDE, INDEX_ALPHA); | |
15952 | + BLK_DEV_INFO(IDE5_MAJOR, "hd", "k", TYPE_IDE, INDEX_ALPHA); | |
15953 | + BLK_DEV_INFO(IDE6_MAJOR, "hd", "m", TYPE_IDE, INDEX_ALPHA); | |
15954 | + BLK_DEV_INFO(IDE7_MAJOR, "hd", "o", TYPE_IDE, INDEX_ALPHA); | |
15955 | + BLK_DEV_INFO(IDE8_MAJOR, "hd", "q", TYPE_IDE, INDEX_ALPHA); | |
15956 | + BLK_DEV_INFO(IDE9_MAJOR, "hd", "s", TYPE_IDE, INDEX_ALPHA); | |
15957 | + | |
15958 | + BLK_DEV_INFO(SCSI_DISK0_MAJOR, "sd", "a", TYPE_SCSI, INDEX_ALPHA); | |
15959 | + BLK_DEV_INFO(SCSI_DISK1_MAJOR, "sd", "q", TYPE_SCSI, INDEX_ALPHA); | |
15960 | + BLK_DEV_INFO(SCSI_DISK2_MAJOR, "sd", "ag", TYPE_SCSI, INDEX_ALPHA); | |
15961 | + BLK_DEV_INFO(SCSI_DISK3_MAJOR, "sd", "aw", TYPE_SCSI, INDEX_ALPHA); | |
15962 | + BLK_DEV_INFO(SCSI_DISK4_MAJOR, "sd", "bm", TYPE_SCSI, INDEX_ALPHA); | |
15963 | + BLK_DEV_INFO(SCSI_DISK5_MAJOR, "sd", "cc", TYPE_SCSI, INDEX_ALPHA); | |
15964 | + BLK_DEV_INFO(SCSI_DISK6_MAJOR, "sd", "cs", TYPE_SCSI, INDEX_ALPHA); | |
15965 | + BLK_DEV_INFO(SCSI_DISK7_MAJOR, "sd", "di", TYPE_SCSI, INDEX_ALPHA); | |
15966 | + | |
15967 | + BLK_DEV_INFO(XT_DISK_MAJOR, "xd", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15968 | + | |
15969 | + BLK_DEV_INFO(CYCLADES_MAJOR, "double", "0", TYPE_GENERIC, | |
15970 | + INDEX_NUMERIC); | |
15971 | + | |
15972 | + BLK_DEV_INFO(MFM_ACORN_MAJOR, "mfm", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15973 | + | |
15974 | + BLK_DEV_INFO(ACSI_MAJOR, "ad", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15975 | + | |
15976 | + BLK_DEV_INFO(PS2ESDI_MAJOR, "ed", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15977 | + | |
15978 | + BLK_DEV_INFO(40, "ez", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15979 | + BLK_DEV_INFO(43, "nb", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
15980 | + BLK_DEV_INFO(44, "ftl", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15981 | + BLK_DEV_INFO(45, "pd", "a", TYPE_GENERIC, INDEX_ALPHA); | |
15982 | + BLK_DEV_INFO(47, "pf", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
15983 | + | |
15984 | + BLK_DEV_INFO(DAC960_MAJOR + 0, "rd/c0d", "0", TYPE_GENERIC, | |
15985 | + INDEX_NUMERIC); | |
15986 | + BLK_DEV_INFO(DAC960_MAJOR + 1, "rd/c1d", "0", TYPE_GENERIC, | |
15987 | + INDEX_NUMERIC); | |
15988 | + BLK_DEV_INFO(DAC960_MAJOR + 2, "rd/c2d", "0", TYPE_GENERIC, | |
15989 | + INDEX_NUMERIC); | |
15990 | + BLK_DEV_INFO(DAC960_MAJOR + 3, "rd/c3d", "0", TYPE_GENERIC, | |
15991 | + INDEX_NUMERIC); | |
15992 | + BLK_DEV_INFO(DAC960_MAJOR + 4, "rd/c4d", "0", TYPE_GENERIC, | |
15993 | + INDEX_NUMERIC); | |
15994 | + BLK_DEV_INFO(DAC960_MAJOR + 5, "rd/c5d", "0", TYPE_GENERIC, | |
15995 | + INDEX_NUMERIC); | |
15996 | + BLK_DEV_INFO(DAC960_MAJOR + 6, "rd/c6d", "0", TYPE_GENERIC, | |
15997 | + INDEX_NUMERIC); | |
15998 | + BLK_DEV_INFO(DAC960_MAJOR + 7, "rd/c7d", "0", TYPE_GENERIC, | |
15999 | + INDEX_NUMERIC); | |
16000 | + | |
16001 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR, "ida/c0d", "0", TYPE_GENERIC, | |
16002 | + INDEX_NUMERIC); | |
16003 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR1, "ida/c1d", "0", TYPE_GENERIC, | |
16004 | + INDEX_NUMERIC); | |
16005 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR2, "ida/c2d", "0", TYPE_GENERIC, | |
16006 | + INDEX_NUMERIC); | |
16007 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR3, "ida/c3d", "0", TYPE_GENERIC, | |
16008 | + INDEX_NUMERIC); | |
16009 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR4, "ida/c4d", "0", TYPE_GENERIC, | |
16010 | + INDEX_NUMERIC); | |
16011 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR5, "ida/c5d", "0", TYPE_GENERIC, | |
16012 | + INDEX_NUMERIC); | |
16013 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR6, "ida/c6d", "0", TYPE_GENERIC, | |
16014 | + INDEX_NUMERIC); | |
16015 | + BLK_DEV_INFO(COMPAQ_SMART2_MAJOR7, "ida/c7d", "0", TYPE_GENERIC, | |
16016 | + INDEX_NUMERIC); | |
16017 | + | |
16018 | + BLK_DEV_INFO(I2O_MAJOR + 0, "i2o/hd", "a", TYPE_GENERIC, INDEX_ALPHA); | |
16019 | + BLK_DEV_INFO(I2O_MAJOR + 1, "i2o/hd", "q", TYPE_GENERIC, INDEX_ALPHA); | |
16020 | + BLK_DEV_INFO(I2O_MAJOR + 2, "i2o/hd", "ag", TYPE_GENERIC, INDEX_ALPHA); | |
16021 | + BLK_DEV_INFO(I2O_MAJOR + 3, "i2o/hd", "aw", TYPE_GENERIC, INDEX_ALPHA); | |
16022 | + BLK_DEV_INFO(I2O_MAJOR + 4, "i2o/hd", "bm", TYPE_GENERIC, INDEX_ALPHA); | |
16023 | + BLK_DEV_INFO(I2O_MAJOR + 5, "i2o/hd", "cc", TYPE_GENERIC, INDEX_ALPHA); | |
16024 | + BLK_DEV_INFO(I2O_MAJOR + 6, "i2o/hd", "cs", TYPE_GENERIC, INDEX_ALPHA); | |
16025 | + BLK_DEV_INFO(I2O_MAJOR + 7, "i2o/hd", "di", TYPE_GENERIC, INDEX_ALPHA); | |
16026 | + | |
16027 | + BLK_DEV_INFO(92, "ppdd", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16028 | + BLK_DEV_INFO(93, "nftl", "a", TYPE_GENERIC, INDEX_ALPHA); | |
16029 | + | |
16030 | + BLK_DEV_INFO(DASD_MAJOR, "dasd", "a", TYPE_GENERIC, INDEX_ALPHA); | |
16031 | + BLK_DEV_INFO(MDISK_MAJOR, "mdisk", "a", TYPE_GENERIC, INDEX_ALPHA); | |
16032 | + | |
16033 | + BLK_DEV_INFO(96, "msd", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16034 | + BLK_DEV_INFO(97, "pktcdvd", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16035 | + | |
16036 | + BLK_DEV_INFO(UBD_MAJOR, "ubd", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16037 | + | |
16038 | + BLK_DEV_INFO(JSFD_MAJOR, "jsfd", "", TYPE_GENERIC, INDEX_NUMERIC); | |
16039 | + | |
16040 | + BLK_DEV_INFO(101, "amiraid/ar", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16041 | + | |
16042 | + BLK_DEV_INFO(104, "cciss/c0d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16043 | + BLK_DEV_INFO(105, "cciss/c1d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16044 | + BLK_DEV_INFO(106, "cciss/c2d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16045 | + BLK_DEV_INFO(107, "cciss/c3d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16046 | + BLK_DEV_INFO(108, "cciss/c4d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16047 | + BLK_DEV_INFO(108, "cciss/c5d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16048 | + BLK_DEV_INFO(110, "cciss/c6d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16049 | + BLK_DEV_INFO(111, "cciss/c7d", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16050 | + | |
16051 | + BLK_DEV_INFO(RAW_MAJOR, "raw", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16052 | + | |
16053 | + BLK_DEV_INFO(VXVM_MAJOR, "vx/dsk", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16054 | + BLK_DEV_INFO(VXDMP_MAJOR, "vx/dmp", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16055 | + BLK_DEV_INFO(LOOP_MAJOR, "loop", "0", TYPE_GENERIC, INDEX_NUMERIC); | |
16056 | +} | |
16057 | + | |
16058 | +static int | |
16059 | +is_in_device_list(struct gendisk *gd, int major, int minor) | |
16060 | +{ | |
16061 | + int found, done, rc; | |
16062 | + struct evms_logical_node *device = NULL; | |
16063 | + struct ldev_private *ldev_prv; | |
16064 | + | |
16065 | + done = found = FALSE; | |
16066 | + while (done == FALSE) { | |
16067 | + rc = evms_cs_find_next_device(device, &device); | |
16068 | + if (rc || !device) | |
16069 | + done = TRUE; | |
16070 | + else { | |
16071 | + ldev_prv = device->private; | |
16072 | + if (ldev_prv->gd == gd) | |
16073 | + if (ldev_prv->major == major) | |
16074 | + if (ldev_prv->minor == minor) | |
16075 | + done = found = TRUE; | |
16076 | + } | |
16077 | + } | |
16078 | + return (found); | |
16079 | +} | |
16080 | + | |
16081 | +static void | |
16082 | +build_devnode_name(char *name_buf, int major) | |
16083 | +{ | |
16084 | + char buf[11], *modifier, *buf_ptr; | |
16085 | + int int_mod, done; | |
16086 | + struct blk_device_info *bdi; | |
16087 | + | |
16088 | + bdi = &blk_dev_info[major]; | |
16089 | + | |
16090 | + /* convert the base name modifier to an integer */ | |
16091 | + modifier = bdi->devnode_name_modifier; | |
16092 | + int_mod = 0; | |
16093 | + while (*modifier) { | |
16094 | + if (bdi->devnode_name_type == INDEX_ALPHA) { | |
16095 | + int_mod *= 26; | |
16096 | + int_mod += *modifier - 'a'; | |
16097 | + } else { | |
16098 | + int_mod *= 10; | |
16099 | + int_mod += *modifier - '0'; | |
16100 | + } | |
16101 | + modifier++; | |
16102 | + if (*modifier) { | |
16103 | + int_mod++; | |
16104 | + } | |
16105 | + } | |
16106 | + /* add in device_index_value */ | |
16107 | + int_mod += bdi->devnode_name_index; | |
16108 | + bdi->devnode_name_index++; | |
16109 | + | |
16110 | + /* convert integer modifier back to ALPHA/NUMERIC chars */ | |
16111 | + memset(buf, 0, sizeof (buf)); | |
16112 | + /* fill the buffer from the rear to front with the | |
16113 | + * ascii version of the modifier, leaving space for | |
16114 | + * NULL terminator at the end. | |
16115 | + */ | |
16116 | + buf_ptr = &buf[sizeof (buf) - 2]; | |
16117 | + done = FALSE; | |
16118 | + do { | |
16119 | + if (bdi->devnode_name_type == INDEX_ALPHA) { | |
16120 | + *buf_ptr = (int_mod % 26) + 'a'; | |
16121 | + int_mod /= 26; | |
16122 | + } else { | |
16123 | + *buf_ptr = (int_mod % 10) + '0'; | |
16124 | + int_mod /= 10; | |
16125 | + } | |
16126 | + if (int_mod) { | |
16127 | + int_mod--; | |
16128 | + } else { | |
16129 | + done = TRUE; | |
16130 | + } | |
16131 | + buf_ptr--; | |
16132 | + } while (!done); | |
16133 | + | |
16134 | + /* find beginning of modifier in buffer */ | |
16135 | + modifier = buf; | |
16136 | + while (!*modifier) | |
16137 | + modifier++; | |
16138 | + | |
16139 | + /* build the final device devnode name */ | |
16140 | + sprintf(name_buf, "%s%s", bdi->devnode_name_base, modifier); | |
16141 | +} | |
16142 | + | |
16143 | +static int | |
16144 | +ldev_mgr_lock_device(struct ldev_private *ldev_prv) | |
16145 | +{ | |
16146 | + int rc; | |
16147 | + struct block_device *bdev; | |
16148 | + | |
16149 | + bdev = bdget(MKDEV(ldev_prv->major, ldev_prv->minor)); | |
16150 | + if (!bdev) | |
16151 | + return -ENOMEM; | |
16152 | + rc = blkdev_get(bdev, FMODE_READ | FMODE_WRITE, 0, BDEV_RAW); | |
16153 | + if (rc) | |
16154 | + return rc; | |
16155 | + ldev_prv->bdev = bdev; | |
16156 | + return 0; | |
16157 | +} | |
16158 | + | |
16159 | +static void | |
16160 | +ldev_mgr_unlock_device(struct ldev_private *ldev_prv) | |
16161 | +{ | |
16162 | + struct block_device *bdev = ldev_prv->bdev; | |
16163 | + ldev_prv->bdev = NULL; | |
16164 | + if (!bdev) { | |
16165 | + LOG_ERROR("error: NULL bdev field detected!\n"); | |
16166 | + BUG(); | |
16167 | + } | |
16168 | + blkdev_put(bdev, BDEV_RAW); | |
16169 | +} | |
16170 | + | |
16171 | +#define DEVICE_KNOWN 1234 | |
16172 | +#define DEVICE_UNINITIALIZED 1235 | |
16173 | +#define DEVICE_MEDIA_NOT_PRESENT 1236 | |
16174 | +static int | |
16175 | +create_logical_disk(struct evms_logical_node **disk_list, | |
16176 | + struct gendisk *gd, int device_index) | |
16177 | +{ | |
16178 | + int rc = 0, major, minor; | |
16179 | + struct evms_logical_node *new_disk = NULL; | |
16180 | + struct ldev_private *ldev_prv = NULL; | |
16181 | + char device_name[EVMS_VOLUME_NAME_SIZE + 1]; | |
16182 | + | |
16183 | + major = gd->major; | |
16184 | + minor = device_index << gd->minor_shift; | |
16185 | + | |
16186 | + /* skip uninitialized devices */ | |
16187 | + if (!blk_size[major]) | |
16188 | + rc = DEVICE_UNINITIALIZED; | |
16189 | + else if (!blk_size[major][minor]) | |
16190 | + rc = DEVICE_UNINITIALIZED; | |
16191 | + if (!rc) { | |
16192 | + /* construct the devnode name for this device */ | |
16193 | + build_devnode_name(device_name, major); | |
16194 | + | |
16195 | + /* skip devices we already know about */ | |
16196 | + if (is_in_device_list(gd, major, minor) == TRUE) | |
16197 | + rc = DEVICE_KNOWN; | |
16198 | + } | |
16199 | + /* allocate the new node */ | |
16200 | + if (!rc) { | |
16201 | + rc = evms_cs_allocate_logical_node(&new_disk); | |
16202 | + } | |
16203 | + /* allocate new nodes's instance data */ | |
16204 | + if (!rc) { | |
16205 | + ldev_prv = kmalloc(sizeof(struct ldev_private), GFP_KERNEL); | |
16206 | + if (!ldev_prv) | |
16207 | + rc = -ENOMEM; | |
16208 | + } | |
16209 | + /* initialize the new node */ | |
16210 | + if (!rc) { | |
16211 | + memset(ldev_prv, 0, sizeof(struct ldev_private)); | |
16212 | + new_disk->plugin = &plugin_header; | |
16213 | + | |
16214 | + /* initialize the instance data */ | |
16215 | + new_disk->private = ldev_prv; | |
16216 | + ldev_prv->gd = gd; | |
16217 | + ldev_prv->major = major; | |
16218 | + ldev_prv->minor = minor; | |
16219 | + rc = ldev_mgr_lock_device(ldev_prv); | |
16220 | + if (rc) { | |
16221 | + LOG_ERROR("error(%d): unable to lock device(%d,%d)!\n", | |
16222 | + rc, major, minor); | |
16223 | + } | |
16224 | + } | |
16225 | + if (!rc) { | |
16226 | + /* determine hardsector size */ | |
16227 | + new_disk->hardsector_size = 512; | |
16228 | + if (hardsect_size[major]) { | |
16229 | + new_disk->hardsector_size = hardsect_size[major][minor]; | |
16230 | + } | |
16231 | + /* save the block size */ | |
16232 | + new_disk->block_size = 1024; | |
16233 | + if (blksize_size[major]) { | |
16234 | + new_disk->block_size = blksize_size[major][minor]; | |
16235 | + } | |
16236 | + /* obtain the device size in sectors | |
16237 | + * | |
16238 | + * try 64bit size first, if that fails | |
16239 | + * fall back on the 32bit size. | |
16240 | + */ | |
16241 | + /* try 64bit size */ | |
16242 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18) | |
16243 | + rc = evms_cs_kernel_ioctl(new_disk, BLKGETSIZE64, | |
16244 | + (ulong) & new_disk->total_vsectors); | |
16245 | + if (!rc) { | |
16246 | + /* convert bytes to 512 byte sectors */ | |
16247 | + new_disk->total_vsectors >>= EVMS_VSECTOR_SIZE_SHIFT; | |
16248 | + } else | |
16249 | +#endif | |
16250 | + { | |
16251 | + /* try 32bit size */ | |
16252 | + ulong dev_size = 0; | |
16253 | + rc = evms_cs_kernel_ioctl(new_disk, BLKGETSIZE, | |
16254 | + (ulong) & dev_size); | |
16255 | + new_disk->total_vsectors = dev_size; | |
16256 | + } | |
16257 | + if (!rc && !new_disk->total_vsectors) { | |
16258 | + rc = -ENOSPC; | |
16259 | + } | |
16260 | + } | |
16261 | + if (!rc) { | |
16262 | + /* remember removable devices */ | |
16263 | + if (gd->flags) | |
16264 | + if (gd->flags[device_index] & GENHD_FL_REMOVABLE) | |
16265 | + new_disk->flags |= EVMS_DEVICE_REMOVABLE; | |
16266 | + | |
16267 | + /* save the devnode name for this device */ | |
16268 | + strcpy(new_disk->name, device_name); | |
16269 | + | |
16270 | + /* register this device with evms */ | |
16271 | + evms_cs_register_device(new_disk); | |
16272 | + MOD_INC_USE_COUNT; | |
16273 | + | |
16274 | + /* append this record the linked list */ | |
16275 | + evms_cs_add_logical_node_to_list(disk_list, new_disk); | |
16276 | + LOG_DETAILS | |
16277 | + ("added logical disk(%s) for physical disk(%u,%u,%s), size("PFU64") in 512 byte units\n", | |
16278 | + new_disk->name, major, minor, new_disk->name, | |
16279 | + new_disk->total_vsectors); | |
16280 | + | |
16281 | + } | |
16282 | + /* reset the "benign" error codes for the caller */ | |
16283 | + switch (rc) { | |
16284 | + case DEVICE_UNINITIALIZED: | |
16285 | + case DEVICE_KNOWN: | |
16286 | + case DEVICE_MEDIA_NOT_PRESENT: | |
16287 | + rc = 0; | |
16288 | + case 0: | |
16289 | + break; | |
16290 | + default: | |
16291 | + LOG_ERROR | |
16292 | + ("error(%d): creating logical disk for device(%d,%d).\n", | |
16293 | + rc, major, minor); | |
16294 | + if (new_disk) { | |
16295 | + evms_cs_deallocate_logical_node(new_disk); | |
16296 | + } | |
16297 | + if (ldev_prv) { | |
16298 | + kfree(ldev_prv); | |
16299 | + } | |
16300 | + break; | |
16301 | + } | |
16302 | + return (rc); | |
16303 | +} | |
16304 | + | |
16305 | +static int | |
16306 | +create_logical_generic_disks(struct evms_logical_node **disk_list, | |
16307 | + struct gendisk *gd) | |
16308 | +{ | |
16309 | + int rc, i; | |
16310 | + | |
16311 | + /* This is a generic device */ | |
16312 | + | |
16313 | + rc = 0; | |
16314 | + LOG_DEBUG("major name = %s\n", gd->major_name); | |
16315 | + LOG_DEBUG("number of real devices = %i\n", gd->nr_real); | |
16316 | + for (i = 0; i < gd->nr_real; i++) { | |
16317 | + LOG_DEBUG("device %d:\n", i); | |
16318 | + rc = create_logical_disk(disk_list, gd, i); | |
16319 | + if (rc) | |
16320 | + break; | |
16321 | + } | |
16322 | + return (rc); | |
16323 | +} | |
16324 | + | |
16325 | +static int | |
16326 | +create_logical_ide_disks(struct evms_logical_node **disk_list, | |
16327 | + struct gendisk *gd) | |
16328 | +{ | |
16329 | + int rc = 0, i; | |
16330 | + ide_hwif_t *ide_hwif; | |
16331 | + ide_drive_t *drive; | |
16332 | + | |
16333 | + /* This is an IDE device */ | |
16334 | + LOG_DEBUG("found IDE major : %i - searching for disks\n", gd->major); | |
16335 | + | |
16336 | + ide_hwif = gd->real_devices; /* IDE internal data */ | |
16337 | + for (i = 0; i < MAX_DRIVES; i++) { | |
16338 | + drive = &(ide_hwif->drives[i]); | |
16339 | + if (drive->present && (drive->media == ide_disk)) { | |
16340 | + /* force the name index value on ide drives */ | |
16341 | + blk_dev_info[gd->major].devnode_name_index = i; | |
16342 | + rc = create_logical_disk(disk_list, gd, i); | |
16343 | + } | |
16344 | + if (rc) | |
16345 | + break; | |
16346 | + } | |
16347 | + return (rc); | |
16348 | +} | |
16349 | + | |
16350 | +static int | |
16351 | +create_logical_scsi_disks(struct evms_logical_node **disk_list, | |
16352 | + struct gendisk *gd) | |
16353 | +{ | |
16354 | + int rc = 0, i; | |
16355 | + Scsi_Disk *SDisks; | |
16356 | + Scsi_Device *SDev; | |
16357 | + | |
16358 | + /* This is an SCSI device */ | |
16359 | + LOG_DEBUG("found SCSI major : %i - searching for disks\n", gd->major); | |
16360 | + LOG_DEBUG("scsi: major name = %s\n", gd->major_name); | |
16361 | + LOG_DEBUG("scsi: number of real devices = %i\n", gd->nr_real); | |
16362 | + SDisks = gd->real_devices; /* SCSI internal data */ | |
16363 | + for (i = 0; i < gd->nr_real; i++) { | |
16364 | + SDev = SDisks[i].device; | |
16365 | + LOG_DEBUG | |
16366 | + ("scsi: Channel = %i, Id = %i, Lun = %i, Capacity = %i\n", | |
16367 | + SDev->channel, SDev->id, SDev->lun, SDisks[i].capacity); | |
16368 | + rc = create_logical_disk(disk_list, gd, i); | |
16369 | + if (rc) | |
16370 | + break; | |
16371 | + } | |
16372 | + return (rc); | |
16373 | +} | |
16374 | + | |
16375 | +static int | |
16376 | +create_logical_disks(struct gendisk *gd, void *p_disk_list) | |
16377 | +{ | |
16378 | + int rc = 0; | |
16379 | + struct evms_logical_node **disk_list = p_disk_list; | |
16380 | + | |
16381 | + /* create logical disks from all IDE & SCSI devices */ | |
16382 | + switch (blk_dev_info[gd->major].device_type) { | |
16383 | + case TYPE_IDE: | |
16384 | + rc = create_logical_ide_disks(disk_list, gd); | |
16385 | + break; | |
16386 | + case TYPE_SCSI: | |
16387 | + rc = create_logical_scsi_disks(disk_list, gd); | |
16388 | + break; | |
16389 | + case TYPE_GENERIC: | |
16390 | + rc = create_logical_generic_disks(disk_list, gd); | |
16391 | + break; | |
16392 | + default: | |
16393 | + LOG_DEBUG("unrecognized device major : %i\n", gd->major); | |
16394 | + break; | |
16395 | + } | |
16396 | + | |
16397 | + return (rc); | |
16398 | +} | |
16399 | + | |
16400 | +static int | |
16401 | +discover_disks(struct evms_logical_node **disk_list) | |
16402 | +{ | |
16403 | + int rc = 0; | |
16404 | + | |
16405 | + MOD_INC_USE_COUNT; | |
16406 | + LOG_ENTRY_EXIT("%s Entry\n", __FUNCTION__); | |
16407 | + | |
16408 | + if (blk_dev_info == NULL) { | |
16409 | + /* allocate space for device info array */ | |
16410 | + blk_dev_info = kmalloc(sizeof (struct blk_device_info) | |
16411 | + * (MAX_BLKDEV + 1), GFP_KERNEL); | |
16412 | + if (blk_dev_info) { | |
16413 | + /* initialize device info array */ | |
16414 | + memset(blk_dev_info, 0, | |
16415 | + sizeof (struct blk_device_info) * (MAX_BLKDEV + 1)); | |
16416 | + init_blk_dev_info(blk_dev_info); | |
16417 | + } else { | |
16418 | + rc = -ENOMEM; | |
16419 | + } | |
16420 | + } | |
16421 | + if (!rc) | |
16422 | + /* create logical disks from the raw devices */ | |
16423 | + rc = walk_gendisk(create_logical_disks, disk_list); | |
16424 | + | |
16425 | + /* free blk_dev_info table and null the ptr to it */ | |
16426 | + kfree(blk_dev_info); | |
16427 | + blk_dev_info = NULL; | |
16428 | + | |
16429 | + LOG_ENTRY_EXIT("%s Exit\n", __FUNCTION__); | |
16430 | + MOD_DEC_USE_COUNT; | |
16431 | + return (rc); | |
16432 | +} | |
16433 | + | |
16434 | +/********************************************************/ | |
16435 | +/* Required Plugin Function Table Entry Point: */ | |
16436 | +/* Delete function */ | |
16437 | +/********************************************************/ | |
16438 | + | |
16439 | +static int | |
16440 | +ldev_mgr_delete(struct evms_logical_node *disk) | |
16441 | +{ | |
16442 | + struct ldev_private *ldev_prv; | |
16443 | + | |
16444 | + /* reset any evms volume related info from | |
16445 | + * the device node, because we can't predict | |
16446 | + * how this node will be used in the future. | |
16447 | + */ | |
16448 | + | |
16449 | + /* removed the feature header if its been used | |
16450 | + */ | |
16451 | + if (disk->feature_header) { | |
16452 | + kfree(disk->feature_header); | |
16453 | + disk->feature_header = NULL; | |
16454 | + } | |
16455 | + /* remove the volume_info structure and flag | |
16456 | + * if this has been used directly by an evms | |
16457 | + * feature. | |
16458 | + */ | |
16459 | + evms_cs_deallocate_volume_info(disk); | |
16460 | + /* reset the flags field to the appropriate state | |
16461 | + */ | |
16462 | + disk->flags &= ~EVMS_VOLUME_FLAG; | |
16463 | + | |
16464 | + /* disk nodes only get deleted when: | |
16465 | + * 1) there are no references to the disk node | |
16466 | + * in memory. | |
16467 | + * 2) the device is removable | |
16468 | + * 3) the device reported a media change | |
16469 | + * | |
16470 | + * All three of these conditions must be true | |
16471 | + * before the disk node can be deleted. | |
16472 | + * evms_check_for_device_changes should set | |
16473 | + * and ensure these conditions before issuing | |
16474 | + * deletes. | |
16475 | + * | |
16476 | + * Newly installed removable media will be | |
16477 | + * picked up in this modules discover code. | |
16478 | + * | |
16479 | + * OR disk nodes can will be deleted if the | |
16480 | + * devices they represent go away, for example | |
16481 | + * in the case of a hotunplugged device or a | |
16482 | + * required driver having been unloaded. | |
16483 | + */ | |
16484 | + if (disk->flags & (EVMS_MEDIA_CHANGED | EVMS_DEVICE_UNAVAILABLE)) { | |
16485 | + LOG_DETAILS("deleting '%s'.\n", disk->name); | |
16486 | + | |
16487 | + evms_cs_unregister_device(disk); | |
16488 | + MOD_DEC_USE_COUNT; | |
16489 | + ldev_prv = disk->private; | |
16490 | + ldev_mgr_unlock_device(ldev_prv); | |
16491 | + if (ldev_prv) { | |
16492 | + kfree(ldev_prv); | |
16493 | + } | |
16494 | + evms_cs_deallocate_logical_node(disk); | |
16495 | + } | |
16496 | + return 0; | |
16497 | +} | |
16498 | + | |
16499 | +/********************************************************/ | |
16500 | +/* Required Plugin Function Table Entry Point: */ | |
16501 | +/* Read function */ | |
16502 | +/********************************************************/ | |
16503 | + | |
16504 | +/* | |
16505 | + * function: ldev_mgr_io_error | |
16506 | + * | |
16507 | + * this function was primarily created because the function | |
16508 | + * buffer_IO_error is inline and kgdb doesn't allow breakpoints | |
16509 | + * to be set on inline functions. Since this was an error path | |
16510 | + * and not mainline, I decided to add a trace statement to help | |
16511 | + * report on the failing condition. | |
16512 | + * | |
16513 | + */ | |
16514 | +static void | |
16515 | +ldev_mgr_io_error(struct evms_logical_node *disk, int io_flag, struct buffer_head *bh, int rc) | |
16516 | +{ | |
16517 | + if (rc == -EOVERFLOW) { | |
16518 | + LOG_SERIOUS | |
16519 | + ("attempt to %s beyond boundary("PFU64") on (%s), rsector(%ld).\n", | |
16520 | + (io_flag) ? "WRITE" : "READ", disk->total_vsectors - 1, | |
16521 | + disk->name, bh->b_rsector); | |
16522 | + } else if (rc == -ENXIO) { | |
16523 | + LOG_SERIOUS("attempt to access a non-existent device(%s).\n", | |
16524 | + disk->name); | |
16525 | + } | |
16526 | + bh->b_end_io(bh, 0); | |
16527 | +} | |
16528 | + | |
16529 | +/********************************************************/ | |
16530 | +/* Required Plugin Function Table Entry Point: */ | |
16531 | +/* Read function */ | |
16532 | +/********************************************************/ | |
16533 | + | |
16534 | +static void | |
16535 | +ldev_mgr_read(struct evms_logical_node *disk, struct buffer_head *bh) | |
16536 | +{ | |
16537 | + int rc = 0; | |
16538 | + request_queue_t *q; | |
16539 | + struct ldev_private *ldev_prv; | |
16540 | + | |
16541 | + ldev_prv = disk->private; | |
16542 | + if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <= | |
16543 | + disk->total_vsectors) { | |
16544 | + bh->b_rdev = MKDEV(ldev_prv->major, ldev_prv->minor); | |
16545 | + q = blk_get_queue(bh->b_rdev); | |
16546 | + if (q) { | |
16547 | + disk->flags &= ~EVMS_DEVICE_UNAVAILABLE; | |
16548 | + q->make_request_fn(q, READ, bh); | |
16549 | + return; | |
16550 | + } else { | |
16551 | + rc = -ENXIO; | |
16552 | + disk->flags |= EVMS_DEVICE_UNAVAILABLE; | |
16553 | + } | |
16554 | + } else { | |
16555 | + rc = -EOVERFLOW; | |
16556 | + } | |
16557 | + if (rc) { | |
16558 | + ldev_mgr_io_error(disk, READ, bh, rc); | |
16559 | + } | |
16560 | +} | |
16561 | + | |
16562 | +/********************************************************/ | |
16563 | +/* Required Plugin Function Table Entry Point: */ | |
16564 | +/* Write function */ | |
16565 | +/********************************************************/ | |
16566 | + | |
16567 | +static void | |
16568 | +ldev_mgr_write(struct evms_logical_node *disk, struct buffer_head *bh) | |
16569 | +{ | |
16570 | + int rc = 0; | |
16571 | + request_queue_t *q; | |
16572 | + struct ldev_private *ldev_prv; | |
16573 | + | |
16574 | + ldev_prv = disk->private; | |
16575 | + if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <= | |
16576 | + disk->total_vsectors) { | |
16577 | + bh->b_rdev = MKDEV(ldev_prv->major, ldev_prv->minor); | |
16578 | + q = blk_get_queue(bh->b_rdev); | |
16579 | + if (q) { | |
16580 | + disk->flags &= ~EVMS_DEVICE_UNAVAILABLE; | |
16581 | + q->make_request_fn(q, WRITE, bh); | |
16582 | + return; | |
16583 | + } else { | |
16584 | + rc = -ENXIO; | |
16585 | + disk->flags |= EVMS_DEVICE_UNAVAILABLE; | |
16586 | + } | |
16587 | + } else { | |
16588 | + rc = -EOVERFLOW; | |
16589 | + } | |
16590 | + if (rc) { | |
16591 | + ldev_mgr_io_error(disk, WRITE, bh, rc); | |
16592 | + } | |
16593 | +} | |
16594 | + | |
16595 | +/********************************************************/ | |
16596 | +/* Required Plugin Function Table Entry Point: */ | |
16597 | +/* Init_io function & Support routines */ | |
16598 | +/********************************************************/ | |
16599 | + | |
16600 | +/* | |
16601 | + * function: allocate_bh | |
16602 | + * | |
16603 | + * This function obtains a buffer head from the private | |
16604 | + * buffer head pool (pre-allocated at EVMS initial | |
16605 | + * discovery time). | |
16606 | + * | |
16607 | + * NOTE: All access to the buffer head pool are protected | |
16608 | + * by a private spinlock. | |
16609 | + * | |
16610 | + */ | |
16611 | +static inline struct buffer_head * | |
16612 | +allocate_bh(void) | |
16613 | +{ | |
16614 | + struct buffer_head *bh = | |
16615 | + evms_cs_allocate_from_pool(evms_bh_pool, FALSE); | |
16616 | + if (bh) { | |
16617 | + init_waitqueue_head(&bh->b_wait); | |
16618 | + } | |
16619 | + return (bh); | |
16620 | +} | |
16621 | + | |
16622 | +/* | |
16623 | + * function: deallocate_bh | |
16624 | + * | |
16625 | + * This function returns a buffer head to the private | |
16626 | + * buffer head pool (pre-allocated at EVMS initial | |
16627 | + * discovery time). | |
16628 | + * | |
16629 | + * NOTE: All access to the buffer head pool are protected | |
16630 | + * by a private spinlock. | |
16631 | + * | |
16632 | + */ | |
16633 | +static inline void | |
16634 | +deallocate_bh(struct buffer_head *bh) | |
16635 | +{ | |
16636 | + evms_cs_deallocate_to_pool(evms_bh_pool, bh); | |
16637 | +} | |
16638 | + | |
16639 | +/* this is the buffer head control block structure definition */ | |
16640 | +typedef struct bh_cb_s { | |
16641 | + int rc; | |
16642 | + atomic_t blks_allocated; | |
16643 | + wait_queue_head_t cb_wait; | |
16644 | +} bh_cb_t; | |
16645 | + | |
16646 | +/* | |
16647 | + * function: __wait_on_bh_cb | |
16648 | + * | |
16649 | + * This is a worker function to wait_on_bh_cb. | |
16650 | + * This function waits for a set of private buffer heads | |
16651 | + * associated to the specified buffer head control block | |
16652 | + * to return from I/O completion. On completion of the | |
16653 | + * last buffer head, the calling function is awakened | |
16654 | + * and continues running. | |
16655 | + * | |
16656 | + * This is the worker function to the function wait_on_bh_cb. | |
16657 | + * | |
16658 | + */ | |
16659 | +static void | |
16660 | +__wait_on_bh_cb(bh_cb_t * bh_cb) | |
16661 | +{ | |
16662 | + struct task_struct *tsk = current; | |
16663 | + DECLARE_WAITQUEUE(wait, tsk); | |
16664 | + | |
16665 | + add_wait_queue(&bh_cb->cb_wait, &wait); | |
16666 | + do { | |
16667 | + run_task_queue(&tq_disk); | |
16668 | + set_task_state(tsk, TASK_UNINTERRUPTIBLE); | |
16669 | + if (!atomic_read(&bh_cb->blks_allocated)) | |
16670 | + break; | |
16671 | + schedule(); | |
16672 | + } while (atomic_read(&bh_cb->blks_allocated)); | |
16673 | +#ifdef O1_SCHEDULER | |
16674 | + set_task_state(tsk, TASK_RUNNING); | |
16675 | +#else | |
16676 | + tsk->state = TASK_RUNNING; | |
16677 | +#endif | |
16678 | + remove_wait_queue(&bh_cb->cb_wait, &wait); | |
16679 | +} | |
16680 | + | |
16681 | +/* | |
16682 | + * function: wait_on_bh_cb | |
16683 | + * | |
16684 | + * This function waits for a set of private buffer heads | |
16685 | + * associated to the specified buffer head control block | |
16686 | + * to return from I/O completion. On completion of the | |
16687 | + * last buffer head, the calling function is awakened | |
16688 | + * and continues running. | |
16689 | + * | |
16690 | + */ | |
16691 | +static void | |
16692 | +wait_on_bh_cb(bh_cb_t * bh_cb) | |
16693 | +{ | |
16694 | + if (atomic_read(&bh_cb->blks_allocated)) | |
16695 | + __wait_on_bh_cb(bh_cb); | |
16696 | + else | |
16697 | + /* if we ended up with no buffer heads on | |
16698 | + * this pass, lets wait a until a few buffer | |
16699 | + * heads have been freed and try again. This | |
16700 | + * should provide a reasonable delay. | |
16701 | + */ | |
16702 | + schedule(); | |
16703 | +} | |
16704 | + | |
16705 | +/* | |
16706 | + * function: end_bh_cb_io | |
16707 | + * | |
16708 | + * This is the I/O completion function that is called for | |
16709 | + * each private buffer head obtained from the buffer head | |
16710 | + * pool. Control is return thru this routine so we can track | |
16711 | + * all outstanding requests to know when to awaken the caller, | |
16712 | + * and to regain control after all I/Os have been performed. | |
16713 | + * | |
16714 | + */ | |
16715 | +static void | |
16716 | +end_bh_cb_io_sync(struct buffer_head *bh, int uptodate) | |
16717 | +{ | |
16718 | + bh_cb_t *bh_cb = (bh_cb_t *) bh->b_private; | |
16719 | + | |
16720 | + /* record that errors occurred */ | |
16721 | + if (!uptodate) { | |
16722 | + bh_cb->rc = -EIO; | |
16723 | + } | |
16724 | + mark_buffer_uptodate(bh, uptodate); | |
16725 | + unlock_buffer(bh); | |
16726 | + | |
16727 | + deallocate_bh(bh); | |
16728 | + atomic_dec(&bh_cb->blks_allocated); | |
16729 | + if (!atomic_read(&bh_cb->blks_allocated)) | |
16730 | + if (waitqueue_active(&bh_cb->cb_wait)) | |
16731 | + wake_up(&bh_cb->cb_wait); | |
16732 | +} | |
16733 | + | |
16734 | +/* | |
16735 | + * function: ldev_partial_sector_init_io | |
16736 | + * | |
16737 | + * This function is a support function for ldev_init_io, | |
16738 | + * which handles the cases of performing I/O to only a part | |
16739 | + * of non-standard sized hardsector. This function is not | |
16740 | + * designed to be called directly, but via ldev_init_io. | |
16741 | + * | |
16742 | + */ | |
16743 | +static int | |
16744 | +ldev_partial_sector_init_io(struct evms_logical_node *node, | |
16745 | + int io_flag, | |
16746 | + bh_cb_t * bh_cb, | |
16747 | + u64 next_lsn, | |
16748 | + u64 sector_lsn, | |
16749 | + u64 io_size, | |
16750 | + void *bufptr, unsigned char **sector_buf) | |
16751 | +{ | |
16752 | + int rc = 0; | |
16753 | + struct ldev_private *ldev_prv = node->private; | |
16754 | + kdev_t dev = MKDEV(ldev_prv->major, ldev_prv->minor); | |
16755 | + struct buffer_head *bh; | |
16756 | + | |
16757 | + if (*sector_buf == NULL) { | |
16758 | + /* allocate buffer for incoming sector */ | |
16759 | + *sector_buf = kmalloc(node->hardsector_size, GFP_KERNEL); | |
16760 | + if (!*sector_buf) | |
16761 | + return -ENOMEM; | |
16762 | + } | |
16763 | + /* allocate a buffer head from the pool */ | |
16764 | + while ((bh = allocate_bh()) == NULL) | |
16765 | + /* yielding the cpu is playing it | |
16766 | + * safe. it might be wiser to just | |
16767 | + * spin. requires more thought. | |
16768 | + */ | |
16769 | + schedule(); | |
16770 | + | |
16771 | + /* set up the buffer head for this sector */ | |
16772 | + bh->b_end_io = end_bh_cb_io_sync; | |
16773 | + bh->b_size = node->hardsector_size; | |
16774 | + bh->b_rdev = dev; | |
16775 | + bh->b_rsector = next_lsn - sector_lsn; | |
16776 | + bh->b_data = *sector_buf; | |
16777 | + bh->b_page = virt_to_page(*sector_buf); /* this isn't handling the case of a block with more than 1 sector, that spans pages */ | |
16778 | + bh->b_state = 0; | |
16779 | + set_bit(BH_Dirty, &bh->b_state); | |
16780 | + set_bit(BH_Lock, &bh->b_state); | |
16781 | + set_bit(BH_Req, &bh->b_state); | |
16782 | + set_bit(BH_Mapped, &bh->b_state); | |
16783 | + bh->b_private = (void *) bh_cb; | |
16784 | + atomic_inc(&bh_cb->blks_allocated); | |
16785 | + | |
16786 | + /* drive the buffer head down */ | |
16787 | + /* to the device */ | |
16788 | + generic_make_request(READ, bh); | |
16789 | + | |
16790 | + /* wait for all bh's I/O's to end */ | |
16791 | + wait_on_bh_cb(bh_cb); | |
16792 | + | |
16793 | + /* copy data to/from user */ | |
16794 | + if (io_flag != WRITE) | |
16795 | + /* READ */ | |
16796 | + memcpy(bufptr, | |
16797 | + *sector_buf + (sector_lsn << EVMS_VSECTOR_SIZE_SHIFT), | |
16798 | + io_size << EVMS_VSECTOR_SIZE_SHIFT); | |
16799 | + else { | |
16800 | + /* WRITE */ | |
16801 | + memcpy(*sector_buf + (sector_lsn << EVMS_VSECTOR_SIZE_SHIFT), | |
16802 | + bufptr, io_size << EVMS_VSECTOR_SIZE_SHIFT); | |
16803 | + | |
16804 | + /* allocate a buffer head from the pool */ | |
16805 | + while ((bh = allocate_bh()) == NULL) | |
16806 | + /* yielding the cpu is playing it | |
16807 | + * safe. it might be wiser to just | |
16808 | + * spin. requires more thought. | |
16809 | + */ | |
16810 | + schedule(); | |
16811 | + | |
16812 | + /* set up the buffer head for this sector */ | |
16813 | + bh->b_end_io = end_bh_cb_io_sync; | |
16814 | + bh->b_size = node->hardsector_size; | |
16815 | + bh->b_rdev = dev; | |
16816 | + bh->b_rsector = next_lsn - sector_lsn; | |
16817 | + bh->b_data = *sector_buf; | |
16818 | + bh->b_page = virt_to_page(*sector_buf); /* this isn't handling the case of a block with more than 1 sector, that spans pages */ | |
16819 | + bh->b_state = 0; | |
16820 | + set_bit(BH_Dirty, &bh->b_state); | |
16821 | + set_bit(BH_Lock, &bh->b_state); | |
16822 | + set_bit(BH_Req, &bh->b_state); | |
16823 | + set_bit(BH_Mapped, &bh->b_state); | |
16824 | + bh->b_private = (void *) bh_cb; | |
16825 | + atomic_inc(&bh_cb->blks_allocated); | |
16826 | + | |
16827 | + /* drive the buffer head down */ | |
16828 | + /* to the device */ | |
16829 | + generic_make_request(WRITE, bh); | |
16830 | + | |
16831 | + /* wait for all bh's I/O's to end */ | |
16832 | + wait_on_bh_cb(bh_cb); | |
16833 | + } | |
16834 | + return (rc); | |
16835 | +} | |
16836 | + | |
16837 | +/* | |
16838 | + * function: ldev_init_io | |
16839 | + * | |
16840 | + * This function provides support for synchronous I/O | |
16841 | + * operations to the underlying devices. These I/O | |
16842 | + * operations are NOT buffered in any way including the | |
16843 | + * operating system's buffer cache. | |
16844 | + * | |
16845 | + * This function can work with any hardsector size that | |
16846 | + * is a power of 2. | |
16847 | + * | |
16848 | + * node : logical node of the target logical disk | |
16849 | + * io_flag : 0 = read, 1 = write, 2 = read-a-head | |
16850 | + * starting_lsn : the 0-based (disk relative) logical | |
16851 | + * : (512 byte) sector number (lsn) | |
16852 | + * num_lsns : the total number of lsns in this I/O | |
16853 | + * bufptr : address of the memory to read/write the data | |
16854 | + * | |
16855 | + */ | |
16856 | +static int | |
16857 | +ldev_init_io(struct evms_logical_node *node, | |
16858 | + int io_flag, | |
16859 | + u64 starting_lsn, u64 num_lsns, void *bufptr) | |
16860 | +{ | |
16861 | + int rc = 0, lsns_per_hardsector, lsns_per_blocksize; | |
16862 | + unchar *sector_buf = NULL, *cur_bufptr; | |
16863 | + u64 next_lsn, remaining_lsns, sector_lsn; | |
16864 | + struct ldev_private *ldev_prv = node->private; | |
16865 | + kdev_t dev = MKDEV(ldev_prv->major, ldev_prv->minor); | |
16866 | + bh_cb_t bh_cb; | |
16867 | + | |
16868 | + LOG_EVERYTHING | |
16869 | + ("%s Entry: Disk(%u,%u), ioflag(%u), start_lsn("PFU64"), num_lsns("PFU64"), bufptr(0x%p)\n", | |
16870 | + __FUNCTION__, ldev_prv->major, ldev_prv->minor, io_flag, | |
16871 | + starting_lsn, num_lsns, bufptr); | |
16872 | + | |
16873 | + /* check for valid device */ | |
16874 | + if (!blk_size[ldev_prv->major][ldev_prv->minor]) { | |
16875 | + node->flags |= EVMS_DEVICE_UNAVAILABLE; | |
16876 | + return (-ENXIO); | |
16877 | + } | |
16878 | + /* check for 0 length request */ | |
16879 | + if (num_lsns == 0) { | |
16880 | + LOG_ERROR("%s: error requesting 0 sectors.\n", __FUNCTION__); | |
16881 | + return (-EINVAL); | |
16882 | + } | |
16883 | + /* check for out of bound request */ | |
16884 | + if ((starting_lsn + num_lsns) > node->total_vsectors) { | |
16885 | + LOG_ERROR | |
16886 | + ("%s: attempted %s beyond logical disk boundary("PFU64" LSNs), requesting LSN("PFU64"), total LSNs("PFU64").\n", | |
16887 | + __FUNCTION__, (io_flag == WRITE) ? "WRITE" : "READ", | |
16888 | + node->total_vsectors, starting_lsn, num_lsns); | |
16889 | + return (-EINVAL); | |
16890 | + } | |
16891 | + /* check for invalid io_flag value */ | |
16892 | + switch (io_flag) { | |
16893 | + case READ: /* read... */ | |
16894 | + case WRITE: /* write... */ | |
16895 | + case READA: /* reada... */ | |
16896 | + break; | |
16897 | + default: | |
16898 | + return (-EINVAL); | |
16899 | + } | |
16900 | + | |
16901 | + /* compute some per device info once up-front */ | |
16902 | + lsns_per_hardsector = node->hardsector_size / EVMS_VSECTOR_SIZE; | |
16903 | + lsns_per_blocksize = node->block_size / EVMS_VSECTOR_SIZE; | |
16904 | + | |
16905 | + /* initialize the buffer head control block */ | |
16906 | + memset(&bh_cb, 0, sizeof (bh_cb_t)); | |
16907 | + init_waitqueue_head(&bh_cb.cb_wait); | |
16908 | + bh_cb.blks_allocated = (atomic_t)ATOMIC_INIT(0); | |
16909 | + | |
16910 | + /* only update the local copy of variables */ | |
16911 | + cur_bufptr = bufptr; | |
16912 | + next_lsn = starting_lsn; | |
16913 | + remaining_lsns = num_lsns; | |
16914 | + | |
16915 | + /* check for a mid-sector starting offset | |
16916 | + * | |
16917 | + * if found, perform I/O on part of that | |
16918 | + * sector | |
16919 | + */ | |
16920 | + sector_lsn = next_lsn & (lsns_per_hardsector - 1); | |
16921 | + if (sector_lsn) { | |
16922 | + u64 io_size; | |
16923 | + | |
16924 | + /* determine bytes in IO to this sector */ | |
16925 | + io_size = lsns_per_hardsector - sector_lsn; | |
16926 | + if (io_size > remaining_lsns) | |
16927 | + io_size = remaining_lsns; | |
16928 | + | |
16929 | + /* perform the partial sector io */ | |
16930 | + rc = ldev_partial_sector_init_io(node, io_flag, &bh_cb, | |
16931 | + next_lsn, | |
16932 | + sector_lsn, io_size, | |
16933 | + cur_bufptr, §or_buf); | |
16934 | + | |
16935 | + if (!rc) { | |
16936 | + /* update progress in local variables */ | |
16937 | + cur_bufptr += io_size << EVMS_VSECTOR_SIZE_SHIFT; | |
16938 | + next_lsn += io_size; | |
16939 | + remaining_lsns -= io_size; | |
16940 | + } | |
16941 | + } | |
16942 | + | |
16943 | + /* continue if no errors found */ | |
16944 | + if (!rc) { | |
16945 | + /* perform I/O on all the complete sectors | |
16946 | + * in this request. | |
16947 | + * | |
16948 | + * loop until there are no more complete sectors | |
16949 | + * to process. | |
16950 | + */ | |
16951 | + while (remaining_lsns >= lsns_per_hardsector) { | |
16952 | + /* this inner loop attempts to drive as many | |
16953 | + * bytes (in sector size multiples) down to | |
16954 | + * the device as possible using the available | |
16955 | + * buffer heads in the pool. | |
16956 | + */ | |
16957 | + while (remaining_lsns >= lsns_per_hardsector) { | |
16958 | + struct buffer_head *bh; | |
16959 | + | |
16960 | + /* allocate a buffer head from the pool */ | |
16961 | + bh = allocate_bh(); | |
16962 | + if (bh == NULL) | |
16963 | + break; | |
16964 | + | |
16965 | + /* set up the buffer head for this I/O */ | |
16966 | + bh->b_end_io = end_bh_cb_io_sync; | |
16967 | + bh->b_size = | |
16968 | + (remaining_lsns >= lsns_per_blocksize) ? | |
16969 | + node->block_size : node->hardsector_size; | |
16970 | + bh->b_data = cur_bufptr; | |
16971 | + bh->b_rdev = dev; | |
16972 | + bh->b_rsector = next_lsn; | |
16973 | + bh->b_page = virt_to_page(cur_bufptr); /* this isn't handling the case of a block with more than 1 sector, that spans pages */ | |
16974 | + bh->b_state = 0; | |
16975 | + set_bit(BH_Dirty, &bh->b_state); | |
16976 | + set_bit(BH_Lock, &bh->b_state); | |
16977 | + set_bit(BH_Req, &bh->b_state); | |
16978 | + set_bit(BH_Mapped, &bh->b_state); | |
16979 | + bh->b_private = (void *) &bh_cb; | |
16980 | + atomic_inc(&bh_cb.blks_allocated); | |
16981 | + | |
16982 | + /* drive the buffer head down */ | |
16983 | + /* to the device */ | |
16984 | + generic_make_request(io_flag, bh); | |
16985 | + | |
16986 | + /* update progress in local variables */ | |
16987 | + cur_bufptr += bh->b_size; | |
16988 | + next_lsn += | |
16989 | + bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
16990 | + remaining_lsns -= | |
16991 | + bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
16992 | + } | |
16993 | + /* wait for all bh's I/O's to end */ | |
16994 | + wait_on_bh_cb(&bh_cb); | |
16995 | + } | |
16996 | + } | |
16997 | + | |
16998 | + /* continue if no errors found */ | |
16999 | + if (!rc) | |
17000 | + /* check for a mid-sector ending offset | |
17001 | + * | |
17002 | + * if found, perform I/O on part of that | |
17003 | + * sector | |
17004 | + */ | |
17005 | + if (remaining_lsns) | |
17006 | + /* perform the partial sector io */ | |
17007 | + rc = ldev_partial_sector_init_io(node, io_flag, &bh_cb, | |
17008 | + next_lsn, | |
17009 | + 0, remaining_lsns, | |
17010 | + cur_bufptr, | |
17011 | + §or_buf); | |
17012 | + | |
17013 | + /* free the sector buffer if it was allocated */ | |
17014 | + if (sector_buf) | |
17015 | + kfree(sector_buf); | |
17016 | + | |
17017 | + /* coalesce return codes */ | |
17018 | + rc |= bh_cb.rc; | |
17019 | + | |
17020 | + LOG_EVERYTHING("%s Exit: rc(%u)\n", __FUNCTION__, rc); | |
17021 | + | |
17022 | + return (rc); | |
17023 | +} | |
17024 | + | |
17025 | +static int | |
17026 | +ldev_mgr_direct_ioctl(struct inode *inode, | |
17027 | + struct file *file, unsigned int cmd, unsigned long arg) | |
17028 | +{ | |
17029 | + int rc = 0; | |
17030 | + struct ldev_private *ldev_prv; | |
17031 | + struct evms_plugin_ioctl_pkt tmp, *user_parms; | |
17032 | + struct ldev_plugin_ioctl pi_data; | |
17033 | + struct evms_logical_node *disk; | |
17034 | + | |
17035 | + MOD_INC_USE_COUNT; | |
17036 | + | |
17037 | + user_parms = (struct evms_plugin_ioctl_pkt *) arg; | |
17038 | + /* copy user's parameters to kernel space */ | |
17039 | + if (copy_from_user(&tmp, user_parms, sizeof (tmp))) | |
17040 | + rc = -EFAULT; | |
17041 | + | |
17042 | + if (!rc) { | |
17043 | + /* validate its meant for us */ | |
17044 | + if (tmp.feature_id != plugin_header.id) { | |
17045 | + rc = -EINVAL; | |
17046 | + } | |
17047 | + } | |
17048 | + | |
17049 | + if (!rc) { | |
17050 | + /* copy feature ioctl data to kernel space */ | |
17051 | + if (copy_from_user(&pi_data, tmp.feature_ioctl_data, | |
17052 | + sizeof (pi_data))) { | |
17053 | + rc = -EFAULT; | |
17054 | + } | |
17055 | + } | |
17056 | + | |
17057 | + if (!rc) { | |
17058 | + /* find the disk node specified by the disk_handle */ | |
17059 | + int done = FALSE; | |
17060 | + disk = NULL; | |
17061 | + while (!done) { | |
17062 | + rc = evms_cs_find_next_device(disk, | |
17063 | + &disk); | |
17064 | + if (rc) { | |
17065 | + break; | |
17066 | + } | |
17067 | + if (!disk) { | |
17068 | + rc = -ENODATA; | |
17069 | + break; | |
17070 | + } | |
17071 | + if (disk == | |
17072 | + DEV_HANDLE_TO_NODE(pi_data.disk_handle)) { | |
17073 | + done = TRUE; | |
17074 | + } | |
17075 | + } | |
17076 | + } | |
17077 | + | |
17078 | + if (!rc) { | |
17079 | + /* perform feature command */ | |
17080 | + ldev_prv = (struct ldev_private *) disk->private; | |
17081 | + switch (tmp.feature_command) { | |
17082 | + kdev_t save_dev; | |
17083 | + case LDEV_MGR_BROADCAST_IOCTL_CMD: | |
17084 | + save_dev = inode->i_rdev; | |
17085 | + inode->i_rdev = | |
17086 | + MKDEV(ldev_prv->major, ldev_prv->minor); | |
17087 | + rc = ldev_prv->bdev->bd_op->ioctl(inode, file, | |
17088 | + pi_data.cmd, | |
17089 | + pi_data.arg); | |
17090 | + inode->i_rdev = save_dev; | |
17091 | + break; | |
17092 | + default: | |
17093 | + rc = -EINVAL; | |
17094 | + break; | |
17095 | + } | |
17096 | + } | |
17097 | + | |
17098 | + /* return status value */ | |
17099 | + tmp.status = rc; | |
17100 | + copy_to_user((struct evms_plugin_ioctl_pkt *) arg, &tmp, sizeof (tmp)); | |
17101 | + MOD_DEC_USE_COUNT; | |
17102 | + return rc; | |
17103 | +} | |
17104 | + | |
17105 | +/********************************************************/ | |
17106 | +/* Required Plugin Function Table Entry Point: */ | |
17107 | +/* IOCTL function & Support routines */ | |
17108 | +/********************************************************/ | |
17109 | + | |
17110 | +static int | |
17111 | +ldev_mgr_ioctl(struct evms_logical_node *disk, | |
17112 | + struct inode *inode, | |
17113 | + struct file *file, unsigned int cmd, unsigned long arg) | |
17114 | +{ | |
17115 | + int rc = 0; | |
17116 | + struct ldev_private *ldev_prv = disk->private; | |
17117 | + kdev_t save_dev; | |
17118 | + struct block_device *save_bdev; | |
17119 | + | |
17120 | + if (!inode || !disk) | |
17121 | + return -EINVAL; | |
17122 | + | |
17123 | + save_dev = inode->i_rdev; | |
17124 | + inode->i_rdev = MKDEV(ldev_prv->major, ldev_prv->minor); | |
17125 | + save_bdev = inode->i_bdev; | |
17126 | + inode->i_bdev = ldev_prv->bdev; | |
17127 | + /* check device availability */ | |
17128 | + if (!blk_get_queue(MKDEV(ldev_prv->major, ldev_prv->minor))) { | |
17129 | + disk->flags |= EVMS_DEVICE_UNAVAILABLE; | |
17130 | + } | |
17131 | + switch (cmd) { | |
17132 | + case EVMS_QUIESCE_VOLUME: | |
17133 | + case EVMS_PLUGIN_IOCTL: | |
17134 | + break; | |
17135 | + case EVMS_GET_BMAP: | |
17136 | + { | |
17137 | + struct evms_get_bmap_pkt *bmap = | |
17138 | + (struct evms_get_bmap_pkt *) arg; | |
17139 | + bmap->dev = MKDEV(ldev_prv->major, ldev_prv->minor); | |
17140 | + bmap->status = 0; | |
17141 | + } | |
17142 | + break; | |
17143 | + case EVMS_OPEN_VOLUME: | |
17144 | + if (disk->flags & EVMS_DEVICE_UNAVAILABLE) { | |
17145 | + rc = -ENXIO; | |
17146 | + } else { | |
17147 | + rc = ldev_prv->bdev->bd_op->open(inode, file); | |
17148 | + } | |
17149 | + break; | |
17150 | + case EVMS_CLOSE_VOLUME: | |
17151 | + if (disk->flags & EVMS_DEVICE_UNAVAILABLE) { | |
17152 | + rc = -ENXIO; | |
17153 | + } else { | |
17154 | + rc = ldev_prv->bdev->bd_op->release(inode, file); | |
17155 | + } | |
17156 | + break; | |
17157 | + case EVMS_CHECK_MEDIA_CHANGE: | |
17158 | + if (disk->flags & EVMS_DEVICE_UNAVAILABLE) { | |
17159 | + rc = -ENXIO; | |
17160 | + } else { | |
17161 | + /* once we detect that media changed | |
17162 | + * is 'set', don't send any more ioctls | |
17163 | + * down to the device, until the | |
17164 | + * media change has been 'reset' by a | |
17165 | + * revalidate disk ioctl. when already | |
17166 | + * 'set', just return a 1 w/o actually | |
17167 | + * performing another ioctl call to the | |
17168 | + * device. | |
17169 | + */ | |
17170 | + if (ldev_prv->media_changed == TRUE) { | |
17171 | + rc = 1; | |
17172 | + break; | |
17173 | + } | |
17174 | + rc = ldev_prv->bdev->bd_op-> | |
17175 | + check_media_change(MKDEV | |
17176 | + (ldev_prv->major, | |
17177 | + ldev_prv->minor)); | |
17178 | + if (rc == 1) { | |
17179 | + ldev_prv->media_changed = TRUE; | |
17180 | + disk->flags |= EVMS_MEDIA_CHANGED; | |
17181 | + } | |
17182 | + } | |
17183 | + break; | |
17184 | + case EVMS_REVALIDATE_DISK: | |
17185 | + if (disk->flags & EVMS_DEVICE_UNAVAILABLE) { | |
17186 | + rc = -ENXIO; | |
17187 | + } else { | |
17188 | + /* don't actually send this ioctl down | |
17189 | + * to the device, until we know that | |
17190 | + * previous check media change ioctl | |
17191 | + * has occurred. | |
17192 | + * | |
17193 | + * when we do actually send the ioctl | |
17194 | + * down, reset the local media_changed | |
17195 | + * flag. | |
17196 | + */ | |
17197 | + if (ldev_prv->media_changed == FALSE) | |
17198 | + break; | |
17199 | + rc = ldev_prv->bdev->bd_op-> | |
17200 | + revalidate(MKDEV | |
17201 | + (ldev_prv->major, ldev_prv->minor)); | |
17202 | + ldev_prv->media_changed = FALSE; | |
17203 | + } | |
17204 | + break; | |
17205 | + case EVMS_GET_DISK_LIST: | |
17206 | + rc = evms_cs_add_item_to_list((struct evms_list_node **) arg, | |
17207 | + disk); | |
17208 | + if (rc > 0) | |
17209 | + rc = 0; | |
17210 | + break; | |
17211 | + case EVMS_CHECK_DEVICE_STATUS: | |
17212 | + if (arg) { | |
17213 | + int *status = (int *) arg; | |
17214 | + *status |= disk->flags; | |
17215 | + } | |
17216 | + break; | |
17217 | + case EVMS_UPDATE_DEVICE_INFO: | |
17218 | + /* determine hardsector size */ | |
17219 | + disk->hardsector_size = 512; | |
17220 | + if (hardsect_size[ldev_prv->major]) { | |
17221 | + disk->hardsector_size = hardsect_size[ldev_prv->major][ldev_prv->minor]; | |
17222 | + } | |
17223 | + /* save the block size */ | |
17224 | + disk->block_size = 1024; | |
17225 | + if (blksize_size[ldev_prv->major]) { | |
17226 | + disk->block_size = blksize_size[ldev_prv->major][ldev_prv->minor]; | |
17227 | + } | |
17228 | + /* device size in sectors | |
17229 | + * | |
17230 | + * try 64bit size first, if that fails | |
17231 | + * fall back on the 32bit size. | |
17232 | + */ | |
17233 | + /* try 64bit size */ | |
17234 | +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18) | |
17235 | + rc = evms_cs_kernel_ioctl(disk, BLKGETSIZE64, | |
17236 | + (ulong) & disk->total_vsectors); | |
17237 | + if (!rc) { | |
17238 | + /* convert bytes to 512 byte sectors */ | |
17239 | + disk->total_vsectors >>= EVMS_VSECTOR_SIZE_SHIFT; | |
17240 | + } else | |
17241 | +#endif | |
17242 | + { | |
17243 | + /* try 32bit size */ | |
17244 | + ulong dev_size = 0; | |
17245 | + rc = evms_cs_kernel_ioctl(disk, BLKGETSIZE, | |
17246 | + (ulong) & dev_size); | |
17247 | + disk->total_vsectors = dev_size; | |
17248 | + } | |
17249 | + break; | |
17250 | + default: | |
17251 | + if (disk->flags & EVMS_DEVICE_UNAVAILABLE) { | |
17252 | + rc = -ENXIO; | |
17253 | + } else { | |
17254 | + rc = ldev_prv->bdev->bd_op->ioctl(inode, file, cmd, | |
17255 | + arg); | |
17256 | + } | |
17257 | + break; | |
17258 | + } | |
17259 | + inode->i_bdev = save_bdev; | |
17260 | + inode->i_rdev = save_dev; | |
17261 | + | |
17262 | + return (rc); | |
17263 | +} | |
17264 | + | |
17265 | +/********************************************************/ | |
17266 | +/* Required Module Entry Point: */ | |
17267 | +/* ldev_mgr_init */ | |
17268 | +/********************************************************/ | |
17269 | + | |
17270 | +static int __init | |
17271 | +ldev_mgr_init(void) | |
17272 | +{ | |
17273 | + return evms_cs_register_plugin(&plugin_header); | |
17274 | +} | |
17275 | + | |
17276 | +static void __exit | |
17277 | +ldev_mgr_exit(void) | |
17278 | +{ | |
17279 | + evms_cs_unregister_plugin(&plugin_header); | |
17280 | +} | |
17281 | + | |
17282 | +module_init(ldev_mgr_init); | |
17283 | +module_exit(ldev_mgr_exit); | |
17284 | +#ifdef MODULE_LICENSE | |
17285 | +MODULE_LICENSE("GPL"); | |
17286 | +#endif | |
17287 | diff -Naur linux-2002-09-30/drivers/evms/lvm_vge.c evms-2002-09-30/drivers/evms/lvm_vge.c | |
17288 | --- linux-2002-09-30/drivers/evms/lvm_vge.c Wed Dec 31 18:00:00 1969 | |
17289 | +++ evms-2002-09-30/drivers/evms/lvm_vge.c Fri Sep 13 16:45:06 2002 | |
17290 | @@ -0,0 +1,3734 @@ | |
17291 | +/* -*- linux-c -*- */ | |
17292 | +/* | |
17293 | + * Copyright (c) International Business Machines Corp., 2000 | |
17294 | + * | |
17295 | + * This program is free software; you can redistribute it and/or modify | |
17296 | + * it under the terms of the GNU General Public License as published by | |
17297 | + * the Free Software Foundation; either version 2 of the License, or | |
17298 | + * (at your option) any later version. | |
17299 | + * | |
17300 | + * This program is distributed in the hope that it will be useful, | |
17301 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17302 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
17303 | + * the GNU General Public License for more details. | |
17304 | + * | |
17305 | + * You should have received a copy of the GNU General Public License | |
17306 | + * along with this program; if not, write to the Free Software | |
17307 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17308 | + */ | |
17309 | +/* | |
17310 | + * linux/drivers/evms/lvm_vge.c | |
17311 | + * | |
17312 | + * EVMS Linux LVM Region Manager | |
17313 | + */ | |
17314 | + | |
17315 | +#define LOG_PREFIX "lvm: " | |
17316 | + | |
17317 | +#include <linux/kernel.h> | |
17318 | +#include <linux/module.h> | |
17319 | +#include <linux/vmalloc.h> | |
17320 | +#include <linux/version.h> | |
17321 | +#include <asm/uaccess.h> | |
17322 | + | |
17323 | +#include <linux/evms/evms.h> | |
17324 | +#include <linux/evms/evms_lvm.h> | |
17325 | + | |
17326 | +/* Plugin API prototypes. */ | |
17327 | +static int lvm_discover(struct evms_logical_node ** evms_node_list); | |
17328 | +static int lvm_discover_end(struct evms_logical_node ** evms_node_list); | |
17329 | +static int lvm_delete_node(struct evms_logical_node * logical_node); | |
17330 | +static void lvm_read(struct evms_logical_node * node, struct buffer_head * bh); | |
17331 | +static void lvm_write(struct evms_logical_node * node, struct buffer_head * bh); | |
17332 | +static int lvm_init_io(struct evms_logical_node * node, | |
17333 | + int io_flag, | |
17334 | + u64 sect_nr, | |
17335 | + u64 num_sects, | |
17336 | + void * buf_addr); | |
17337 | +static int lvm_ioctl(struct evms_logical_node * logical_node, | |
17338 | + struct inode * inode, | |
17339 | + struct file * file, | |
17340 | + unsigned int cmd, | |
17341 | + unsigned long arg); | |
17342 | +static int lvm_direct_ioctl(struct inode * inode, | |
17343 | + struct file * file, | |
17344 | + unsigned int cmd, | |
17345 | + unsigned long args); | |
17346 | + | |
17347 | +static struct snapshot_map_entry * allocate_snapshot_map_entry(u64 org_sector, | |
17348 | + u64 snap_sector); | |
17349 | + | |
17350 | +/* LVM Plugin function table and header. */ | |
17351 | +static struct evms_plugin_fops lvm_fops = { | |
17352 | + .discover = lvm_discover, | |
17353 | + .end_discover = lvm_discover_end, | |
17354 | + .delete = lvm_delete_node, | |
17355 | + .read = lvm_read, | |
17356 | + .write = lvm_write, | |
17357 | + .init_io = lvm_init_io, | |
17358 | + .ioctl = lvm_ioctl, | |
17359 | + .direct_ioctl = lvm_direct_ioctl | |
17360 | +}; | |
17361 | + | |
17362 | +static struct evms_plugin_header lvm_plugin_header = { | |
17363 | + .id = SetPluginID(IBM_OEM_ID, | |
17364 | + EVMS_REGION_MANAGER, | |
17365 | + 0x01), | |
17366 | + .version = { | |
17367 | + .major = EVMS_LVM_VERSION_MAJOR, | |
17368 | + .minor = EVMS_LVM_VERSION_MINOR, | |
17369 | + .patchlevel = EVMS_LVM_VERSION_PATCH | |
17370 | + }, | |
17371 | + .required_services_version = { | |
17372 | + .major = 0, | |
17373 | + .minor = 5, | |
17374 | + .patchlevel = 0 | |
17375 | + }, | |
17376 | + .fops = &lvm_fops | |
17377 | +}; | |
17378 | + | |
17379 | +static struct lvm_volume_group * lvm_group_list = NULL; | |
17380 | +static struct proc_dir_entry * lvm_proc = NULL; | |
17381 | + | |
17382 | + | |
17383 | +/********** Miscellaneous Functions **********/ | |
17384 | + | |
17385 | + | |
17386 | +/** | |
17387 | + * remap sector | |
17388 | + * @node: | |
17389 | + * @org_sector: Logical sector to remap. | |
17390 | + * @size: Size (in sectors) or request to remap. | |
17391 | + * @new_sector: Remapped sector. | |
17392 | + * @new_size: New size (in sectors). | |
17393 | + * @pe_start_sector: Starting sector of PE - needed for snapshotting. | |
17394 | + * @pv_entry: New node for which new_sector is relative. | |
17395 | + * | |
17396 | + * Common function to remap LV lba to PV lba in appropriate PE. This | |
17397 | + * function needs to deal with requests that span PEs and/or stripes. If | |
17398 | + * this occurs, the request will simply be chopped off at the boundary of | |
17399 | + * the first PE/stripe. It is up to the calling function to loop | |
17400 | + * accordingly to finish the full remapping. This function is now partially | |
17401 | + * 64-bit enabled. The striping section contains code that currently cannot | |
17402 | + * eliminate at least one mod operation on 64 bit values. | |
17403 | + **/ | |
17404 | +static int remap_sector(struct evms_logical_node * node, | |
17405 | + u64 org_sector, | |
17406 | + u64 size, | |
17407 | + u64 * new_sector, | |
17408 | + u64 * new_size, | |
17409 | + u64 * pe_start_sector, | |
17410 | + struct lvm_physical_volume ** pv_entry) | |
17411 | +{ | |
17412 | + struct lvm_logical_volume * volume = node->private; | |
17413 | + struct le_table_entry * le_entry; | |
17414 | + u32 le, offset_in_le; | |
17415 | + | |
17416 | + *new_size = size; | |
17417 | + | |
17418 | + if ( volume->stripes > 1 ) { | |
17419 | + /* Volume is striped. Reset the size if the request crosses | |
17420 | + * a stripe boundary. Striping in LVM is not 64-bit enabled. | |
17421 | + */ | |
17422 | + u32 column, columns, sectors_per_column; | |
17423 | + u32 sector_in_column, stripe_in_column, le_in_column; | |
17424 | + u32 offset_in_stripe, stripe_in_le; | |
17425 | + u32 org_sector32 = org_sector; | |
17426 | + | |
17427 | + sectors_per_column = volume->stripes * volume->pe_size; | |
17428 | + column = org_sector32 / sectors_per_column; | |
17429 | + sector_in_column = org_sector32 % sectors_per_column; | |
17430 | + stripe_in_column = sector_in_column / volume->stripe_size; | |
17431 | + le_in_column = stripe_in_column % volume->stripes; | |
17432 | + columns = volume->num_le / volume->stripes; | |
17433 | + le = column + (columns * le_in_column); | |
17434 | + | |
17435 | + offset_in_stripe = org_sector32 % volume->stripe_size; | |
17436 | + stripe_in_le = stripe_in_column / volume->stripes; | |
17437 | + offset_in_le = offset_in_stripe + | |
17438 | + stripe_in_le * volume->stripe_size; | |
17439 | + | |
17440 | + if ( offset_in_stripe + size > volume->stripe_size ) { | |
17441 | + *new_size = volume->stripe_size - offset_in_stripe; | |
17442 | + } | |
17443 | + } else { | |
17444 | + /* Linear volume. Just find LE and offset. Reset the size if | |
17445 | + * the request crosses an LE boundary. This path is 64-bit safe. | |
17446 | + */ | |
17447 | + le = org_sector >> volume->pe_size_shift; | |
17448 | + offset_in_le = org_sector & (volume->pe_size - 1); | |
17449 | + | |
17450 | + if ( offset_in_le + size > volume->pe_size ) { | |
17451 | + *new_size = volume->pe_size - offset_in_le; | |
17452 | + } | |
17453 | + } | |
17454 | + | |
17455 | + le_entry = &volume->le_map[le]; | |
17456 | + *pe_start_sector = le_entry->pe_sector_offset; | |
17457 | + *new_sector = le_entry->pe_sector_offset + offset_in_le; | |
17458 | + *pv_entry = le_entry->owning_pv; | |
17459 | + | |
17460 | + return 0; | |
17461 | +} | |
17462 | + | |
17463 | +/** | |
17464 | + * add_group_to_list | |
17465 | + * | |
17466 | + * Add a volume group to the end of the LVM global group list. | |
17467 | + **/ | |
17468 | +static int add_group_to_list(struct lvm_volume_group * group) | |
17469 | +{ | |
17470 | + struct lvm_volume_group ** p_group; | |
17471 | + | |
17472 | + for ( p_group = &lvm_group_list; | |
17473 | + *p_group; p_group = &(*p_group)->next_group ) { | |
17474 | + ; | |
17475 | + } | |
17476 | + | |
17477 | + *p_group = group; | |
17478 | + group->next_group = NULL; | |
17479 | + return 0; | |
17480 | +} | |
17481 | + | |
17482 | +/** | |
17483 | + * remove_group_from_list | |
17484 | + * | |
17485 | + * Remove an LVM volume group from the global LVM list. | |
17486 | + **/ | |
17487 | +static int remove_group_from_list(struct lvm_volume_group * group) | |
17488 | +{ | |
17489 | + struct lvm_volume_group ** p_group; | |
17490 | + | |
17491 | + for ( p_group = &lvm_group_list; | |
17492 | + *p_group; p_group = &(*p_group)->next_group ) { | |
17493 | + if ( *p_group == group ) { | |
17494 | + *p_group = (*p_group)->next_group; | |
17495 | + group->next_group = NULL; | |
17496 | + break; | |
17497 | + } | |
17498 | + } | |
17499 | + | |
17500 | + return 0; | |
17501 | +} | |
17502 | + | |
17503 | +/** | |
17504 | + * find_group_by_uuid | |
17505 | + * | |
17506 | + * Use the vg_uuid to find the desired volume group. | |
17507 | + **/ | |
17508 | +static int find_group_by_uuid(u8 * vg_uuid, | |
17509 | + struct lvm_volume_group ** group) | |
17510 | +{ | |
17511 | + struct lvm_volume_group * gp; | |
17512 | + | |
17513 | + for ( gp = lvm_group_list; gp; gp = gp->next_group ) { | |
17514 | + if ( ! memcmp(vg_uuid, gp->vg_uuid, UUID_LEN) ) { | |
17515 | + *group = gp; | |
17516 | + return 0; | |
17517 | + } | |
17518 | + } | |
17519 | + *group = NULL; | |
17520 | + return -EINVAL; | |
17521 | +} | |
17522 | + | |
17523 | +/** | |
17524 | + * find_pv_by_number | |
17525 | + * | |
17526 | + * Search the PV list of the specified volume group, looking for the | |
17527 | + * specified PV number. If found, return a pointer to that PV. | |
17528 | + **/ | |
17529 | +static struct lvm_physical_volume * | |
17530 | +find_pv_by_number(u32 pv_number, | |
17531 | + struct lvm_volume_group * group) | |
17532 | +{ | |
17533 | + struct lvm_physical_volume * pv_entry; | |
17534 | + | |
17535 | + for ( pv_entry = group->pv_list; pv_entry; pv_entry = pv_entry->next ) { | |
17536 | + if ( pv_entry->pv_number == pv_number ) { | |
17537 | + return pv_entry; | |
17538 | + } | |
17539 | + } | |
17540 | + return NULL; | |
17541 | +} | |
17542 | + | |
17543 | +/** | |
17544 | + * translate_lv_name | |
17545 | + * @lvm_lv_name: Input LVM-style name. | |
17546 | + * @evms_node_name: Output EVMS-style name. | |
17547 | + * | |
17548 | + * In LVM, volumes have names based on their dev-node, which follow the | |
17549 | + * pattern /dev/group_name/volume_name. In EVMS, the same volume needs | |
17550 | + * to appear as /dev/evms/lvm/group_name/volume_name. Thus, the name from | |
17551 | + * the lv_disk_t needs to be translated before copying to the associated | |
17552 | + * node. evms_node_name must point to a NAME_LEN sized buffer. | |
17553 | + **/ | |
17554 | +static int translate_lv_name(char * lvm_lv_name, char * evms_node_name) | |
17555 | +{ | |
17556 | + char * ptr; | |
17557 | + | |
17558 | + memset(evms_node_name, 0, NAME_LEN); | |
17559 | + | |
17560 | + /* Make sure the string starts with /dev/, and skip over it. */ | |
17561 | + ptr = strstr(lvm_lv_name, DEV_DIRECTORY); | |
17562 | + if ( ptr != lvm_lv_name ) { | |
17563 | + LOG_SERIOUS("Invalid LV name: %s\n", lvm_lv_name); | |
17564 | + return -EINVAL; | |
17565 | + } | |
17566 | + ptr = &ptr[strlen(DEV_DIRECTORY)]; | |
17567 | + | |
17568 | + /* ptr now points to "group_name/volume_name". | |
17569 | + * Use this to create the name for the EVMS node. | |
17570 | + */ | |
17571 | + strcpy(evms_node_name, LVM_DEV_DIRECTORY); | |
17572 | + strncat(evms_node_name, ptr, NAME_LEN - strlen(evms_node_name) - 1); | |
17573 | + | |
17574 | + return 0; | |
17575 | +} | |
17576 | + | |
17577 | +/** | |
17578 | + * check_pv_for_lv | |
17579 | + * | |
17580 | + * Run through all LE maps of all LVs in this group, and make sure the | |
17581 | + * specified PV is not being pointed to by any LEs. | |
17582 | + **/ | |
17583 | +static int check_pv_for_lv(struct lvm_physical_volume * pv_entry, | |
17584 | + struct lvm_volume_group * group) | |
17585 | +{ | |
17586 | + struct lvm_logical_volume * volume; | |
17587 | + int i, j; | |
17588 | + | |
17589 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
17590 | + if ( (volume = group->volume_list[i]) ) { | |
17591 | + for ( j = 0; j < volume->num_le; j++ ) { | |
17592 | + if ( volume->le_map[j].owning_pv == pv_entry ) { | |
17593 | + return -EINVAL; | |
17594 | + } | |
17595 | + } | |
17596 | + } | |
17597 | + } | |
17598 | + return 0; | |
17599 | +} | |
17600 | + | |
17601 | + | |
17602 | +/********** Metadata I/O Functions **********/ | |
17603 | + | |
17604 | + | |
17605 | +/** | |
17606 | + * endian_convert_pv | |
17607 | + * | |
17608 | + * Endian-neutral conversion for PV structures. | |
17609 | + **/ | |
17610 | +static inline void endian_convert_pv(struct pv_disk * pv) | |
17611 | +{ | |
17612 | + pv->version = le16_to_cpup(&pv->version); | |
17613 | + pv->pv_on_disk.base = le32_to_cpup(&pv->pv_on_disk.base); | |
17614 | + pv->pv_on_disk.size = le32_to_cpup(&pv->pv_on_disk.size); | |
17615 | + pv->vg_on_disk.base = le32_to_cpup(&pv->vg_on_disk.base); | |
17616 | + pv->vg_on_disk.size = le32_to_cpup(&pv->vg_on_disk.size); | |
17617 | + pv->pv_uuidlist_on_disk.base = | |
17618 | + le32_to_cpup(&pv->pv_uuidlist_on_disk.base); | |
17619 | + pv->pv_uuidlist_on_disk.size = | |
17620 | + le32_to_cpup(&pv->pv_uuidlist_on_disk.size); | |
17621 | + pv->lv_on_disk.base = le32_to_cpup(&pv->lv_on_disk.base); | |
17622 | + pv->lv_on_disk.size = le32_to_cpup(&pv->lv_on_disk.size); | |
17623 | + pv->pe_on_disk.base = le32_to_cpup(&pv->pe_on_disk.base); | |
17624 | + pv->pe_on_disk.size = le32_to_cpup(&pv->pe_on_disk.size); | |
17625 | + pv->pv_major = le32_to_cpup(&pv->pv_major); | |
17626 | + pv->pv_number = le32_to_cpup(&pv->pv_number); | |
17627 | + pv->pv_status = le32_to_cpup(&pv->pv_status); | |
17628 | + pv->pv_allocatable = le32_to_cpup(&pv->pv_allocatable); | |
17629 | + pv->pv_size = le32_to_cpup(&pv->pv_size); | |
17630 | + pv->lv_cur = le32_to_cpup(&pv->lv_cur); | |
17631 | + pv->pe_size = le32_to_cpup(&pv->pe_size); | |
17632 | + pv->pe_total = le32_to_cpup(&pv->pe_total); | |
17633 | + pv->pe_allocated = le32_to_cpup(&pv->pe_allocated); | |
17634 | + pv->pe_start = le32_to_cpup(&pv->pe_start); | |
17635 | +} | |
17636 | + | |
17637 | +/** | |
17638 | + * read_pv | |
17639 | + * | |
17640 | + * Read in the PV structure from the specified node. If it contains a | |
17641 | + * valid PV signature, allocate a new struct pv_disk and copy the data. | |
17642 | + **/ | |
17643 | +static int read_pv(struct evms_logical_node * node, struct pv_disk ** pv) | |
17644 | +{ | |
17645 | + struct pv_disk * pv_buffer; | |
17646 | + int rc = -ENOMEM; | |
17647 | + | |
17648 | + *pv = NULL; | |
17649 | + | |
17650 | + /* Buffer for reading the PV metadata. */ | |
17651 | + pv_buffer = kmalloc(LVM_PV_DISK_SIZE, GFP_NOIO); | |
17652 | + if (!pv_buffer) { | |
17653 | + LOG_CRITICAL("Error allocating PV metadata buffer for %s\n", | |
17654 | + node->name); | |
17655 | + goto out; | |
17656 | + } | |
17657 | + | |
17658 | + /* Read the first two sectors. */ | |
17659 | + rc = INIT_IO(node, 0, evms_cs_size_in_vsectors(LVM_PV_DISK_BASE), | |
17660 | + evms_cs_size_in_vsectors(LVM_PV_DISK_SIZE), pv_buffer); | |
17661 | + if (rc) { | |
17662 | + LOG_SERIOUS("Error reading PV metadata from %s\n", node->name); | |
17663 | + goto out_kfree; | |
17664 | + } | |
17665 | + | |
17666 | + /* Endian-neutral conversion of PV metadata. */ | |
17667 | + endian_convert_pv(pv_buffer); | |
17668 | + | |
17669 | + /* Check for an LVM signature and make sure the sizes match. | |
17670 | + * Versions 1 and 2 are both valid now. Thanks LVM! :) | |
17671 | + */ | |
17672 | + if ( !(pv_buffer->id[0] == 'H' && | |
17673 | + pv_buffer->id[1] == 'M' && | |
17674 | + (pv_buffer->version == 1 || pv_buffer->version == 2) && | |
17675 | + pv_buffer->pv_size == node->total_vsectors) ) { | |
17676 | + LOG_EXTRA("%s is not an LVM PV\n", node->name); | |
17677 | + rc = -EINVAL; | |
17678 | + goto out_kfree; | |
17679 | + } | |
17680 | + | |
17681 | + /* This is a valid PV. Allocate a new pv_disk. */ | |
17682 | + *pv = kmalloc(sizeof(struct pv_disk), GFP_NOIO); | |
17683 | + if (!*pv) { | |
17684 | + LOG_CRITICAL("Error allocating new PV for %s\n", node->name); | |
17685 | + rc = -ENOMEM; | |
17686 | + goto out_kfree; | |
17687 | + } | |
17688 | + | |
17689 | + /* Copy the metadata. */ | |
17690 | + memcpy(*pv, pv_buffer, sizeof(struct pv_disk)); | |
17691 | + | |
17692 | +out_kfree: | |
17693 | + kfree(pv_buffer); | |
17694 | +out: | |
17695 | + return rc; | |
17696 | +} | |
17697 | + | |
17698 | +/** | |
17699 | + * endian_convert_vg | |
17700 | + * | |
17701 | + * Endian-neutral conversion for VG structures | |
17702 | + **/ | |
17703 | +static inline void endian_convert_vg(struct vg_disk * vg) | |
17704 | +{ | |
17705 | + vg->vg_number = le32_to_cpup(&vg->vg_number); | |
17706 | + vg->vg_access = le32_to_cpup(&vg->vg_access); | |
17707 | + vg->vg_status = le32_to_cpup(&vg->vg_status); | |
17708 | + vg->lv_max = le32_to_cpup(&vg->lv_max); | |
17709 | + vg->lv_cur = le32_to_cpup(&vg->lv_cur); | |
17710 | + vg->lv_open = le32_to_cpup(&vg->lv_open); | |
17711 | + vg->pv_max = le32_to_cpup(&vg->pv_max); | |
17712 | + vg->pv_cur = le32_to_cpup(&vg->pv_cur); | |
17713 | + vg->pv_act = le32_to_cpup(&vg->pv_act); | |
17714 | + vg->dummy = le32_to_cpup(&vg->dummy); | |
17715 | + vg->vgda = le32_to_cpup(&vg->vgda); | |
17716 | + vg->pe_size = le32_to_cpup(&vg->pe_size); | |
17717 | + vg->pe_total = le32_to_cpup(&vg->pe_total); | |
17718 | + vg->pe_allocated = le32_to_cpup(&vg->pe_allocated); | |
17719 | + vg->pvg_total = le32_to_cpup(&vg->pvg_total); | |
17720 | +} | |
17721 | + | |
17722 | +/** | |
17723 | + * read_vg | |
17724 | + * | |
17725 | + * Read in the VG structure from the specified node. Allocate a new | |
17726 | + * struct vg_disk and copy the data. | |
17727 | + **/ | |
17728 | +static int read_vg(struct evms_logical_node * node, | |
17729 | + struct pv_disk * pv, | |
17730 | + struct vg_disk ** vg) | |
17731 | +{ | |
17732 | + struct vg_disk * vg_buffer; | |
17733 | + unsigned long vg_sectors; | |
17734 | + int rc = -ENOMEM; | |
17735 | + | |
17736 | + /* Allocate a buffer to read the VG metadata. */ | |
17737 | + vg_sectors = evms_cs_size_in_vsectors(pv->vg_on_disk.size); | |
17738 | + vg_buffer = kmalloc(vg_sectors << EVMS_VSECTOR_SIZE_SHIFT, GFP_NOIO); | |
17739 | + if (!vg_buffer) { | |
17740 | + LOG_CRITICAL("Error allocating VG metadata buffer for %s\n", | |
17741 | + node->name); | |
17742 | + goto out; | |
17743 | + } | |
17744 | + | |
17745 | + /* Read the VG metadata. */ | |
17746 | + rc = INIT_IO(node, 0, evms_cs_size_in_vsectors(pv->vg_on_disk.base), | |
17747 | + vg_sectors, vg_buffer); | |
17748 | + if (rc) { | |
17749 | + LOG_SERIOUS("Error reading VG metadata from %s\n", node->name); | |
17750 | + goto out_kfree; | |
17751 | + } | |
17752 | + | |
17753 | + /* Endian-neutral conversion of VG metadata. */ | |
17754 | + endian_convert_vg(vg_buffer); | |
17755 | + | |
17756 | + /* Allocate a new struct vg_disk. */ | |
17757 | + *vg = kmalloc(sizeof(struct vg_disk), GFP_NOIO); | |
17758 | + if (!*vg) { | |
17759 | + LOG_CRITICAL("Error allocating new VG for %s\n", node->name); | |
17760 | + rc = -ENOMEM; | |
17761 | + goto out_kfree; | |
17762 | + } | |
17763 | + | |
17764 | + /* Copy the metadata. */ | |
17765 | + memcpy(*vg, vg_buffer, sizeof(struct vg_disk)); | |
17766 | + | |
17767 | +out_kfree: | |
17768 | + kfree(vg_buffer); | |
17769 | +out: | |
17770 | + return rc; | |
17771 | +} | |
17772 | + | |
17773 | +/** | |
17774 | + * read_uuid_list | |
17775 | + **/ | |
17776 | +static int read_uuid_list(struct evms_logical_node * node, | |
17777 | + struct pv_disk * pv, | |
17778 | + struct lvm_volume_group * group) | |
17779 | +{ | |
17780 | + u64 start_sector; | |
17781 | + unsigned long total_sectors; | |
17782 | + unsigned char * uuid_buffer; | |
17783 | + unsigned long buffer_size = IO_BUFFER_SECTORS * EVMS_VSECTOR_SIZE; | |
17784 | + unsigned long uuid_list_size; | |
17785 | + int i, rc = 0; | |
17786 | + | |
17787 | + if (group->uuid_list) { | |
17788 | + LOG_EXTRA("Already read PV UUIDs for group %s\n", | |
17789 | + group->vg_name); | |
17790 | + goto out; | |
17791 | + } | |
17792 | + | |
17793 | + start_sector = evms_cs_size_in_vsectors(pv->pv_uuidlist_on_disk.base); | |
17794 | + total_sectors = evms_cs_size_in_vsectors(pv->pv_uuidlist_on_disk.size); | |
17795 | + uuid_list_size = round_up(total_sectors * EVMS_VSECTOR_SIZE, | |
17796 | + buffer_size); | |
17797 | + | |
17798 | + /* Allocate a buffer to perform the I/Os. */ | |
17799 | + uuid_buffer = kmalloc(buffer_size, GFP_NOIO); | |
17800 | + if (!uuid_buffer) { | |
17801 | + LOG_CRITICAL("Error allocating buffer for UUID list in group %s\n", | |
17802 | + group->vg_name); | |
17803 | + rc = -ENOMEM; | |
17804 | + goto out; | |
17805 | + } | |
17806 | + | |
17807 | + /* Allocate memory for the UUID array for this group. */ | |
17808 | + group->uuid_list = vmalloc(uuid_list_size); | |
17809 | + if (!group->uuid_list) { | |
17810 | + LOG_CRITICAL("Error allocating UUID list for group %s\n", | |
17811 | + group->vg_name); | |
17812 | + rc = -ENOMEM; | |
17813 | + goto out_kfree; | |
17814 | + } | |
17815 | + memset(group->uuid_list, 0, uuid_list_size); | |
17816 | + | |
17817 | + for ( i = 0; i < total_sectors; i += IO_BUFFER_SECTORS ) { | |
17818 | + rc = INIT_IO(node, 0, start_sector + i, | |
17819 | + IO_BUFFER_SECTORS, uuid_buffer); | |
17820 | + if (rc) { | |
17821 | + LOG_SERIOUS("Error reading PV UUID list from %s\n", | |
17822 | + node->name); | |
17823 | + goto out_vfree; | |
17824 | + } | |
17825 | + /* Copy the I/O buffer into the UUID array. */ | |
17826 | + memcpy(&(group->uuid_list[i * EVMS_VSECTOR_SIZE]), | |
17827 | + uuid_buffer, buffer_size); | |
17828 | + } | |
17829 | + | |
17830 | + /* Clear out the unused portion at the end of the uuid_list. */ | |
17831 | + memset(&(group->uuid_list[pv->pv_uuidlist_on_disk.size]), 0, | |
17832 | + uuid_list_size - pv->pv_uuidlist_on_disk.size); | |
17833 | + | |
17834 | +out_kfree: | |
17835 | + kfree(uuid_buffer); | |
17836 | +out: | |
17837 | + return rc; | |
17838 | + | |
17839 | +out_vfree: | |
17840 | + vfree(group->uuid_list); | |
17841 | + group->uuid_list = NULL; | |
17842 | + goto out_kfree; | |
17843 | +} | |
17844 | + | |
17845 | +/** | |
17846 | + * endian_convert_lv | |
17847 | + * | |
17848 | + * Endian-neutral conversion for LV structures | |
17849 | + **/ | |
17850 | +static inline void endian_convert_lv(struct lv_disk * lv) | |
17851 | +{ | |
17852 | + lv->lv_access = le32_to_cpup(&lv->lv_access); | |
17853 | + lv->lv_status = le32_to_cpup(&lv->lv_status); | |
17854 | + lv->lv_open = le32_to_cpup(&lv->lv_open); | |
17855 | + lv->lv_dev = le32_to_cpup(&lv->lv_dev); | |
17856 | + lv->lv_number = le32_to_cpup(&lv->lv_number); | |
17857 | + lv->lv_mirror_copies = le32_to_cpup(&lv->lv_mirror_copies); | |
17858 | + lv->lv_recovery = le32_to_cpup(&lv->lv_recovery); | |
17859 | + lv->lv_schedule = le32_to_cpup(&lv->lv_schedule); | |
17860 | + lv->lv_size = le32_to_cpup(&lv->lv_size); | |
17861 | + lv->lv_snapshot_minor = le32_to_cpup(&lv->lv_snapshot_minor); | |
17862 | + lv->lv_chunk_size = le16_to_cpup(&lv->lv_chunk_size); | |
17863 | + lv->dummy = le16_to_cpup(&lv->dummy); | |
17864 | + lv->lv_allocated_le = le32_to_cpup(&lv->lv_allocated_le); | |
17865 | + lv->lv_stripes = le32_to_cpup(&lv->lv_stripes); | |
17866 | + lv->lv_stripesize = le32_to_cpup(&lv->lv_stripesize); | |
17867 | + lv->lv_badblock = le32_to_cpup(&lv->lv_badblock); | |
17868 | + lv->lv_allocation = le32_to_cpup(&lv->lv_allocation); | |
17869 | + lv->lv_io_timeout = le32_to_cpup(&lv->lv_io_timeout); | |
17870 | + lv->lv_read_ahead = le32_to_cpup(&lv->lv_read_ahead); | |
17871 | +} | |
17872 | + | |
17873 | +static inline void endian_convert_lvs(struct lvm_volume_group * group) | |
17874 | +{ | |
17875 | + int i; | |
17876 | + for ( i = 0; i < group->vg->lv_max; i++ ) { | |
17877 | + endian_convert_lv(&(group->lv_array[i])); | |
17878 | + } | |
17879 | +} | |
17880 | + | |
17881 | +/** | |
17882 | + * read_lv | |
17883 | + * | |
17884 | + * Read in the LV structures for the specified group. Do the read from | |
17885 | + * the first PV in the group. If that one fails, keep trying on the | |
17886 | + * remaining PVs until one works. This function will allocate a buffer | |
17887 | + * for the group to read in the structures. | |
17888 | + **/ | |
17889 | +static int read_lv(struct lvm_volume_group * group) | |
17890 | +{ | |
17891 | + struct lvm_physical_volume * pv_entry = group->pv_list; | |
17892 | + unsigned char * lv_buffer = NULL; | |
17893 | + u64 start_sector; | |
17894 | + unsigned long total_sectors, lv_array_size = 0; | |
17895 | + unsigned long buffer_size = IO_BUFFER_SECTORS * EVMS_VSECTOR_SIZE; | |
17896 | + int i, rc = 1; | |
17897 | + | |
17898 | + if (group->lv_array) { | |
17899 | + return 0; | |
17900 | + } | |
17901 | + | |
17902 | + if (!pv_entry) { | |
17903 | + LOG_ERROR("Group %s has no PVs. Cannot read LV structures.\n", | |
17904 | + group->vg_name); | |
17905 | + return -EINVAL; | |
17906 | + } | |
17907 | + | |
17908 | + /* Allocate a buffer to do the actual I/Os. */ | |
17909 | + lv_buffer = kmalloc(buffer_size, GFP_NOIO); | |
17910 | + if (!lv_buffer) { | |
17911 | + LOG_CRITICAL("Error allocating buffer for LV structs for Group %s\n", | |
17912 | + group->vg_name); | |
17913 | + return -ENOMEM; | |
17914 | + } | |
17915 | + | |
17916 | + /* Read in the LV structures 4k at a time. If one PV returns errors, | |
17917 | + * start over with the next PV in the group. | |
17918 | + */ | |
17919 | + while (rc && pv_entry) { | |
17920 | + start_sector = evms_cs_size_in_vsectors(pv_entry->pv->lv_on_disk.base); | |
17921 | + total_sectors = evms_cs_size_in_vsectors(pv_entry->pv->lv_on_disk.size); | |
17922 | + lv_array_size = round_up(total_sectors * EVMS_VSECTOR_SIZE, | |
17923 | + buffer_size); | |
17924 | + | |
17925 | + /* Allocate the buffer for this group to | |
17926 | + * hold the entire LV array. | |
17927 | + */ | |
17928 | + if (group->lv_array) { | |
17929 | + vfree(group->lv_array); | |
17930 | + group->lv_array = NULL; | |
17931 | + } | |
17932 | + group->lv_array = vmalloc(lv_array_size); | |
17933 | + if (!group->lv_array) { | |
17934 | + LOG_CRITICAL("Error allocating lv_array buffer for Group %s\n", | |
17935 | + group->vg_name); | |
17936 | + rc = -ENOMEM; | |
17937 | + goto out_kfree; | |
17938 | + } | |
17939 | + memset(group->lv_array, 0, lv_array_size); | |
17940 | + | |
17941 | + for ( i = 0; i < total_sectors; i += IO_BUFFER_SECTORS ) { | |
17942 | + rc = INIT_IO(pv_entry->logical_node, 0, | |
17943 | + start_sector + i, IO_BUFFER_SECTORS, | |
17944 | + lv_buffer); | |
17945 | + if (rc) { | |
17946 | + LOG_SERIOUS("Error reading LV metadata from %s in Group %s\n", | |
17947 | + pv_entry->logical_node->name, | |
17948 | + group->vg_name); | |
17949 | + | |
17950 | + /* Try the next PV if the current one | |
17951 | + * caused any errors. | |
17952 | + */ | |
17953 | + pv_entry = pv_entry->next; | |
17954 | + break; | |
17955 | + } | |
17956 | + /* Copy the I/O buffer into the lv_array. */ | |
17957 | + memcpy(&(((char *)(group->lv_array))[i * EVMS_VSECTOR_SIZE]), | |
17958 | + lv_buffer, buffer_size); | |
17959 | + } | |
17960 | + } | |
17961 | + | |
17962 | + if (rc) { | |
17963 | + LOG_SERIOUS("Unable to read LV metadata from any PV in Group %s\n", | |
17964 | + group->vg_name); | |
17965 | + goto out_vfree; | |
17966 | + } | |
17967 | + | |
17968 | + /* Clear out the unused portion at the end of the lv_array. */ | |
17969 | + memset(&(((char *)(group->lv_array))[pv_entry->pv->lv_on_disk.size]), | |
17970 | + 0, lv_array_size - pv_entry->pv->lv_on_disk.size); | |
17971 | + | |
17972 | + /* Endian-neutral conversion of the LV metadata. */ | |
17973 | + endian_convert_lvs(group); | |
17974 | + | |
17975 | +out_kfree: | |
17976 | + kfree(lv_buffer); | |
17977 | + return rc; | |
17978 | + | |
17979 | +out_vfree: | |
17980 | + vfree(group->lv_array); | |
17981 | + group->lv_array = NULL; | |
17982 | + goto out_kfree; | |
17983 | +} | |
17984 | + | |
17985 | +/** | |
17986 | + * endian_convert_pe_map | |
17987 | + * | |
17988 | + * Endian-neutral conversion for PE structures | |
17989 | + **/ | |
17990 | +static inline void endian_convert_pe_map(struct lvm_physical_volume * pv_entry) | |
17991 | +{ | |
17992 | + int i; | |
17993 | + for ( i = 0; i < pv_entry->pv->pe_total; i++ ) { | |
17994 | + pv_entry->pe_map[i].lv_num = | |
17995 | + le16_to_cpup(&pv_entry->pe_map[i].lv_num); | |
17996 | + pv_entry->pe_map[i].le_num = | |
17997 | + le16_to_cpup(&pv_entry->pe_map[i].le_num); | |
17998 | + } | |
17999 | +} | |
18000 | + | |
18001 | +/** | |
18002 | + * read_pe_map | |
18003 | + * | |
18004 | + * Read in the PE map for the specified PV. This function will allocate a | |
18005 | + * buffer to read in the data. | |
18006 | + **/ | |
18007 | +static int read_pe_map(struct lvm_physical_volume * pv_entry) | |
18008 | +{ | |
18009 | + struct evms_logical_node * node = pv_entry->logical_node; | |
18010 | + struct pv_disk * pv = pv_entry->pv; | |
18011 | + unsigned char * pe_buffer; | |
18012 | + u64 start_sector; | |
18013 | + unsigned long total_sectors, pe_map_size; | |
18014 | + unsigned long buffer_size = IO_BUFFER_SECTORS * EVMS_VSECTOR_SIZE; | |
18015 | + int i, rc = -ENOMEM; | |
18016 | + | |
18017 | + if (pv_entry->pe_map) { | |
18018 | + return 0; | |
18019 | + } | |
18020 | + | |
18021 | + start_sector = evms_cs_size_in_vsectors(pv->pe_on_disk.base); | |
18022 | + total_sectors = evms_cs_size_in_vsectors(pv->pe_total * | |
18023 | + sizeof(struct pe_disk)); | |
18024 | + pe_map_size = round_up(total_sectors * EVMS_VSECTOR_SIZE, buffer_size); | |
18025 | + | |
18026 | + /* Allocate a buffer for performing the I/O. */ | |
18027 | + pe_buffer = kmalloc(buffer_size, GFP_NOIO); | |
18028 | + if (!pe_buffer) { | |
18029 | + LOG_CRITICAL("Error allocating buffer for PE maps for %s\n", | |
18030 | + node->name); | |
18031 | + goto out; | |
18032 | + } | |
18033 | + | |
18034 | + /* Allocate a buffer to hold the PE map for this PV. */ | |
18035 | + pv_entry->pe_map = vmalloc(pe_map_size); | |
18036 | + if (!pv_entry->pe_map) { | |
18037 | + LOG_CRITICAL("Error allocating PE map for %s\n", node->name); | |
18038 | + goto out_kfree; | |
18039 | + } | |
18040 | + memset(pv_entry->pe_map, 0, pe_map_size); | |
18041 | + | |
18042 | + for ( i = 0; i < total_sectors; i += IO_BUFFER_SECTORS ) { | |
18043 | + rc = INIT_IO(node, 0, start_sector + i, | |
18044 | + IO_BUFFER_SECTORS, pe_buffer); | |
18045 | + if (rc) { | |
18046 | + LOG_SERIOUS("Error reading PE maps from %s.\n", | |
18047 | + node->name); | |
18048 | + goto out_vfree; | |
18049 | + } | |
18050 | + /* Copy the data to the actual PE map. */ | |
18051 | + memcpy(&(((char *)(pv_entry->pe_map))[i * EVMS_VSECTOR_SIZE]), | |
18052 | + pe_buffer, buffer_size); | |
18053 | + } | |
18054 | + | |
18055 | + /* Clear out the unused portion at the end of the PE map. */ | |
18056 | + memset(&(((char *)(pv_entry->pe_map))[total_sectors * EVMS_VSECTOR_SIZE]), | |
18057 | + 0, pe_map_size - total_sectors * EVMS_VSECTOR_SIZE); | |
18058 | + | |
18059 | + /* Endian-neutral conversion of the PE metadata. */ | |
18060 | + endian_convert_pe_map(pv_entry); | |
18061 | + | |
18062 | +out_kfree: | |
18063 | + kfree(pe_buffer); | |
18064 | +out: | |
18065 | + return rc; | |
18066 | + | |
18067 | +out_vfree: | |
18068 | + vfree(pv_entry->pe_map); | |
18069 | + pv_entry->pe_map = NULL; | |
18070 | + goto out_kfree; | |
18071 | +} | |
18072 | + | |
18073 | + | |
18074 | +/********** Snapshot Manipulation Functions **********/ | |
18075 | + | |
18076 | + | |
18077 | +/** | |
18078 | + * snapshot_check_quiesce_original | |
18079 | + * | |
18080 | + * For this snapshot LV, check that both it and its original are quiesced. | |
18081 | + **/ | |
18082 | +static int | |
18083 | +snapshot_check_quiesce_original(struct lvm_logical_volume * snap_volume) | |
18084 | +{ | |
18085 | + struct lvm_logical_volume * org_volume = snap_volume->snapshot_org; | |
18086 | + | |
18087 | + if ( ! (snap_volume->lv_access & EVMS_LV_QUIESCED) ) { | |
18088 | + return -EINVAL; | |
18089 | + } | |
18090 | + | |
18091 | + if ( org_volume && !(org_volume->lv_access & EVMS_LV_QUIESCED) ) { | |
18092 | + return -EINVAL; | |
18093 | + } | |
18094 | + | |
18095 | + return 0; | |
18096 | +} | |
18097 | + | |
18098 | +/** | |
18099 | + * snapshot_check_quiesce_all | |
18100 | + * | |
18101 | + * Go through the list of all snapshots for an original volume, and make | |
18102 | + * sure everyone is in a quiesced state. | |
18103 | + **/ | |
18104 | +static int snapshot_check_quiesce_all(struct lvm_logical_volume * org_volume) | |
18105 | +{ | |
18106 | + struct lvm_logical_volume * snap; | |
18107 | + | |
18108 | + if ( ! (org_volume->lv_access & EVMS_LV_QUIESCED) ) { | |
18109 | + return -EINVAL; | |
18110 | + } | |
18111 | + | |
18112 | + for ( snap = org_volume->snapshot_next; | |
18113 | + snap; snap = snap->snapshot_next ) { | |
18114 | + if ( ! (snap->lv_access & EVMS_LV_QUIESCED) ) { | |
18115 | + return -EINVAL; | |
18116 | + } | |
18117 | + } | |
18118 | + | |
18119 | + return 0; | |
18120 | +} | |
18121 | + | |
18122 | +/** | |
18123 | + * invalidate_snapshot_volume | |
18124 | + * | |
18125 | + * In the event a snapshot volume becomes full or corrupted, its metadata | |
18126 | + * must be altered in order to prevent it from being used again. Write some | |
18127 | + * invalid data into the first entry of the COW table. If this volume is | |
18128 | + * not fully deleted by the user/engine, this invalid COW entry will be | |
18129 | + * detected by build_snapshot_maps(), and will cause the volume to be | |
18130 | + * deleted before being exported to EVMS during discover. This is obviously | |
18131 | + * a hack, but it is the same hack currently used by LVM. We're just trying | |
18132 | + * to be compatible. :) | |
18133 | + **/ | |
18134 | +static int invalidate_snapshot_volume(struct lvm_logical_volume * snap_volume) | |
18135 | +{ | |
18136 | + struct evms_logical_node tmp_node; | |
18137 | + | |
18138 | + tmp_node.private = snap_volume; | |
18139 | + tmp_node.total_vsectors = snap_volume->lv_size; | |
18140 | + | |
18141 | + if ( ! (snap_volume->lv_access & LV_SNAPSHOT) ) { | |
18142 | + LOG_WARNING("Volume %s is not a snapshot. Cannot invalidate\n", | |
18143 | + snap_volume->name); | |
18144 | + return -EINVAL; | |
18145 | + } | |
18146 | + | |
18147 | + LOG_WARNING("Invalidating full/corrupt snapshot %s\n", | |
18148 | + snap_volume->name); | |
18149 | + LOG_WARNING("Run the EVMS administration tools to remove this snapshot.\n"); | |
18150 | + | |
18151 | + if (snap_volume->cow_table) { | |
18152 | + snap_volume->cow_table[0].pv_org_rsector = | |
18153 | + cpu_to_le64(((u64)1)); | |
18154 | + if ( lvm_init_io(&tmp_node, 4, 0, 1, snap_volume->cow_table) ) { | |
18155 | + LOG_SERIOUS("Unable to invalidate snapshot %s\n", | |
18156 | + snap_volume->name); | |
18157 | + } | |
18158 | + } else { | |
18159 | + LOG_SERIOUS("Unable to invalidate snapshot %s\n", | |
18160 | + snap_volume->name); | |
18161 | + } | |
18162 | + | |
18163 | + snap_volume->lv_status &= ~LV_ACTIVE; | |
18164 | + return 0; | |
18165 | +} | |
18166 | + | |
18167 | +/** | |
18168 | + * remove_snapshot_from_chain | |
18169 | + * | |
18170 | + * Remove a snapshot volume from its original's chain of snapshots. This | |
18171 | + * does not delete the snapshot volume. At runtime, we cannot delete | |
18172 | + * volumes at the region-manager level, because EVMS may have this volume | |
18173 | + * exported, and there is no way to notify EVMS of the deletion. It will | |
18174 | + * eventually need to be deleted in the engine, which will then tell the | |
18175 | + * EVMS kernel services to delete the volume in the kernel. | |
18176 | + **/ | |
18177 | +static int remove_snapshot_from_chain(struct lvm_logical_volume * snap_volume) | |
18178 | +{ | |
18179 | + struct lvm_logical_volume * org_volume = snap_volume->snapshot_org; | |
18180 | + struct lvm_logical_volume ** p_volume; | |
18181 | + | |
18182 | + if (org_volume) { | |
18183 | + for ( p_volume = &org_volume->snapshot_next; | |
18184 | + *p_volume; | |
18185 | + p_volume = &(*p_volume)->snapshot_next ) { | |
18186 | + if ( *p_volume == snap_volume ) { | |
18187 | + *p_volume = snap_volume->snapshot_next; | |
18188 | + break; | |
18189 | + } | |
18190 | + } | |
18191 | + } | |
18192 | + | |
18193 | + snap_volume->snapshot_org = NULL; | |
18194 | + snap_volume->snapshot_next = NULL; | |
18195 | + return 0; | |
18196 | +} | |
18197 | + | |
18198 | +/** | |
18199 | + * snapshot_hash | |
18200 | + * | |
18201 | + * The snapshot hash tables are NEVER going to have 4 billion entries, so | |
18202 | + * we can safely cast the org_sector to 32 bits and just mod it by the | |
18203 | + * hash table size. | |
18204 | + **/ | |
18205 | +static u32 snapshot_hash(u64 org_sector, | |
18206 | + struct lvm_logical_volume * snap_volume) | |
18207 | +{ | |
18208 | + return (((u32)org_sector) % snap_volume->hash_table_size); | |
18209 | +} | |
18210 | + | |
18211 | +/** | |
18212 | + * snapshot_search_hash_chain | |
18213 | + * | |
18214 | + * Search the hash chain that is anchored at the specified head pointer. | |
18215 | + * If the sector number is found, the result pointer is set to that entry | |
18216 | + * in the chain, and a 1 is returned. If the sector is not found, the | |
18217 | + * result pointer is set to the previous entry and 0 is returned. If the | |
18218 | + * result pointer is NULL, this means either the list is empty, or the | |
18219 | + * specified sector should become the first list item. | |
18220 | + **/ | |
18221 | +static int snapshot_search_hash_chain(u64 org_sector, | |
18222 | + struct snapshot_map_entry * head, | |
18223 | + struct snapshot_map_entry ** result) | |
18224 | +{ | |
18225 | + struct snapshot_map_entry * curr = head; | |
18226 | + struct snapshot_map_entry * prev = head; | |
18227 | + while ( curr && curr->org_sector < org_sector ) { | |
18228 | + prev = curr; | |
18229 | + curr = curr->next; | |
18230 | + } | |
18231 | + if (!curr) { | |
18232 | + /* Either an empty chain or went off the end of the chain. */ | |
18233 | + *result = prev; | |
18234 | + return 0; | |
18235 | + } else if ( curr->org_sector != org_sector ) { | |
18236 | + *result = curr->prev; | |
18237 | + return 0; | |
18238 | + } else { | |
18239 | + /* Found the desired sector. */ | |
18240 | + *result = curr; | |
18241 | + return 1; | |
18242 | + } | |
18243 | +} | |
18244 | + | |
18245 | +/** | |
18246 | + * insert_snapshot_map_entry | |
18247 | + * | |
18248 | + * Insert a new entry into a snapshot hash chain, immediately following the | |
18249 | + * specified entry. This function should not be used to add an entry into | |
18250 | + * an empty list, or as the first entry in an existing list. For that case, | |
18251 | + * use insert_snapshot_map_entry_at_head(). | |
18252 | + **/ | |
18253 | +static int insert_snapshot_map_entry(struct snapshot_map_entry * entry, | |
18254 | + struct snapshot_map_entry * base) | |
18255 | +{ | |
18256 | + entry->next = base->next; | |
18257 | + entry->prev = base; | |
18258 | + base->next = entry; | |
18259 | + if (entry->next) { | |
18260 | + entry->next->prev = entry; | |
18261 | + } | |
18262 | + return 0; | |
18263 | +} | |
18264 | + | |
18265 | +/** | |
18266 | + * insert_snapshot_map_entry_at_head | |
18267 | + * | |
18268 | + * Insert a new entry into a snapshot chain as the first entry. | |
18269 | + **/ | |
18270 | +static int insert_snapshot_map_entry_at_head(struct snapshot_map_entry * entry, | |
18271 | + struct snapshot_map_entry ** head) | |
18272 | +{ | |
18273 | + entry->next = *head; | |
18274 | + entry->prev = NULL; | |
18275 | + *head = entry; | |
18276 | + if (entry->next) { | |
18277 | + entry->next->prev = entry; | |
18278 | + } | |
18279 | + return 0; | |
18280 | +} | |
18281 | + | |
18282 | +/** | |
18283 | + * add_cow_entry_to_snapshot_map | |
18284 | + * | |
18285 | + * Convert a cow table entry (from the on-disk data) into an appropriate | |
18286 | + * entry for the snapshot map. Insert this new entry into the appropriate | |
18287 | + * map for the specified volume. | |
18288 | + * | |
18289 | + * The cow_entry passed into this function must have already been | |
18290 | + * endian-converted from disk-order to cpu-order. | |
18291 | + **/ | |
18292 | +static int add_cow_entry_to_snapshot_map(struct lv_COW_table_disk * cow_entry, | |
18293 | + struct lvm_logical_volume * volume) | |
18294 | +{ | |
18295 | + struct snapshot_map_entry * new_entry, * target_entry; | |
18296 | + struct snapshot_map_entry ** hash_table, * chain_head; | |
18297 | + u32 hash_value; | |
18298 | + | |
18299 | + if ( cow_entry->pv_org_number == 0 ) { | |
18300 | + return -EINVAL; | |
18301 | + } | |
18302 | + | |
18303 | + new_entry = allocate_snapshot_map_entry(cow_entry->pv_org_rsector, | |
18304 | + cow_entry->pv_snap_rsector); | |
18305 | + if (!new_entry) { | |
18306 | + return -ENOMEM; | |
18307 | + } | |
18308 | + | |
18309 | + new_entry->snap_pv = find_pv_by_number(cow_entry->pv_snap_number, | |
18310 | + volume->group); | |
18311 | + if (!new_entry->snap_pv) { | |
18312 | + kfree(new_entry); | |
18313 | + return -EINVAL; | |
18314 | + } | |
18315 | + | |
18316 | + hash_value = snapshot_hash(new_entry->org_sector, volume); | |
18317 | + hash_table = volume->snapshot_map[cow_entry->pv_org_number]; | |
18318 | + chain_head = hash_table[hash_value]; | |
18319 | + if ( snapshot_search_hash_chain(new_entry->org_sector, | |
18320 | + chain_head, &target_entry) ) { | |
18321 | + /* In general, we should not find this entry in the snapshot | |
18322 | + * map already. However, it could happen on a re-discover, but | |
18323 | + * the build_snapshot_maps function should weed out those cases. | |
18324 | + * In either event, we can simply ignore duplicates. | |
18325 | + */ | |
18326 | + LOG_WARNING("Detected a duplicate snapshot map entry\n"); | |
18327 | + LOG_WARNING("Snap PV "PFU64":"PFU64", Org PV "PFU64":"PFU64"\n", | |
18328 | + cow_entry->pv_snap_number, | |
18329 | + cow_entry->pv_snap_rsector, | |
18330 | + cow_entry->pv_org_number, | |
18331 | + cow_entry->pv_org_rsector); | |
18332 | + kfree(new_entry); | |
18333 | + } else { | |
18334 | + if (target_entry) { | |
18335 | + insert_snapshot_map_entry(new_entry, target_entry); | |
18336 | + } else { | |
18337 | + insert_snapshot_map_entry_at_head(new_entry, | |
18338 | + &hash_table[hash_value]); | |
18339 | + } | |
18340 | + } | |
18341 | + | |
18342 | + return 0; | |
18343 | +} | |
18344 | + | |
18345 | +/** | |
18346 | + * snapshot_remap_sector | |
18347 | + * | |
18348 | + * Perform a sector remap on a snapshot volume. This should be called from | |
18349 | + * the I/O read path, after the LE-to-PE translation has already been | |
18350 | + * performed. First, determine the base sector of the chunk containing the | |
18351 | + * specified sector, and save the remainder. Then, perform a search through | |
18352 | + * the snapshot map for the specified volume. If an match is found, change | |
18353 | + * the PV and sector numbers to the new values. If no match is found, leave | |
18354 | + * the values alone, meaning the read should proceed down the original | |
18355 | + * volume. | |
18356 | + **/ | |
18357 | +static void | |
18358 | +snapshot_remap_sector(struct lvm_logical_volume * snap_volume, | |
18359 | + u64 pe_start_sector, | |
18360 | + u64 * sector, | |
18361 | + struct lvm_physical_volume ** pv_entry) | |
18362 | +{ | |
18363 | + struct snapshot_map_entry ** hash_table; | |
18364 | + struct snapshot_map_entry * chain_head, * result; | |
18365 | + u32 hash_value; | |
18366 | + u64 chunk_sector, remainder; | |
18367 | + | |
18368 | + if ( ! (snap_volume->lv_access & LV_SNAPSHOT) ) { | |
18369 | + return; | |
18370 | + } | |
18371 | + | |
18372 | + chunk_sector = ((*sector - pe_start_sector) & | |
18373 | + ((u64)(~(snap_volume->chunk_size - 1)))) + | |
18374 | + pe_start_sector; | |
18375 | + remainder = *sector - chunk_sector; | |
18376 | + hash_value = snapshot_hash(chunk_sector, snap_volume); | |
18377 | + hash_table = snap_volume->snapshot_map[(*pv_entry)->pv_number]; | |
18378 | + chain_head = hash_table[hash_value]; | |
18379 | + | |
18380 | + if ( snapshot_search_hash_chain(chunk_sector, chain_head, &result) ) { | |
18381 | + *pv_entry = result->snap_pv; | |
18382 | + *sector = result->snap_sector + remainder; | |
18383 | + } | |
18384 | +} | |
18385 | + | |
18386 | +/** | |
18387 | + * snapshot_read_write_chunk | |
18388 | + * | |
18389 | + * This function takes care of reading one chunk of data from the | |
18390 | + * original, and writing it to the snapshot. Since the original now has | |
18391 | + * a fixed sized buffer for this data, we may have to loop to get the | |
18392 | + * whole chunk copied. | |
18393 | + **/ | |
18394 | +static int snapshot_read_write_chunk(struct lvm_logical_volume * org_volume, | |
18395 | + struct lvm_physical_volume * org_pv, | |
18396 | + u64 chunk_sector, | |
18397 | + struct lvm_logical_volume * snap_volume, | |
18398 | + struct lvm_physical_volume ** snap_pv, | |
18399 | + u64 * snap_sector) | |
18400 | +{ | |
18401 | + u32 io_size = snap_volume->chunk_size; | |
18402 | + u64 snap_pe_start_sector, size; | |
18403 | + int i, iterations = 1; | |
18404 | + | |
18405 | + if ( org_volume->chunk_size < snap_volume->chunk_size ) { | |
18406 | + iterations = snap_volume->chunk_size / org_volume->chunk_size; | |
18407 | + io_size = org_volume->chunk_size; | |
18408 | + } | |
18409 | + | |
18410 | + remap_sector(snap_volume->volume_node, snap_volume->next_free_chunk, 1, | |
18411 | + snap_sector, &size, &snap_pe_start_sector, snap_pv); | |
18412 | + | |
18413 | + /* Check for an incomplete volume. */ | |
18414 | + if (!*snap_sector || !*snap_pv) { | |
18415 | + invalidate_snapshot_volume(snap_volume); | |
18416 | + return -1; | |
18417 | + } | |
18418 | + | |
18419 | + for ( i = 0; i < iterations; i++ ) { | |
18420 | + | |
18421 | + /* Read the chunk from the original volume. This is a physical | |
18422 | + * read, not logical. Thus, stripe boundary considerations are | |
18423 | + * unnecessary. Also, chunks are always aligned with PEs, so PE | |
18424 | + * boundary considerations are unnecessary. | |
18425 | + */ | |
18426 | + if ( INIT_IO(org_pv->logical_node, 0, | |
18427 | + chunk_sector + i * io_size, io_size, | |
18428 | + org_volume->chunk_data_buffer) ) { | |
18429 | + return 1; | |
18430 | + } | |
18431 | + | |
18432 | + /* Write this chunk to the snapshot volume. This does duplicate | |
18433 | + * the local init_io code, but we need to have the remapped | |
18434 | + * sector later on, so this is slightly more efficient. Snapshot | |
18435 | + * volumes cannot be striped, so there is no need to consider | |
18436 | + * stripe-boundary conditions. And just like the read in the | |
18437 | + * previous line, chunks are always aligned with PEs, so we | |
18438 | + * don't have to consider PE-boundary conditions. | |
18439 | + */ | |
18440 | + if ( INIT_IO((*snap_pv)->logical_node, 1, | |
18441 | + *snap_sector + i * io_size, io_size, | |
18442 | + org_volume->chunk_data_buffer) ) { | |
18443 | + /* An error writing the chunk to the snapshot is the | |
18444 | + * same situation as the snapshot being full. | |
18445 | + */ | |
18446 | + invalidate_snapshot_volume(snap_volume); | |
18447 | + return -1; | |
18448 | + } | |
18449 | + } | |
18450 | + | |
18451 | + return 0; | |
18452 | +} | |
18453 | + | |
18454 | +/** | |
18455 | + * snapshot_copy_data | |
18456 | + * | |
18457 | + * On a write to a snapshotted volume, check all snapshots to see if the | |
18458 | + * specified chunk has already been remapped. If it has not, read the | |
18459 | + * original data from the volume, write the data to the next available | |
18460 | + * chunk on the snapshot, update the COW table, write the COW table to | |
18461 | + * the snapshot, and insert a new entry into the snapshot map. | |
18462 | + * | |
18463 | + * Now converted to copy data to a single snapshot. The looping is left | |
18464 | + * up to lvm_write. | |
18465 | + **/ | |
18466 | +static int snapshot_copy_data(struct lvm_logical_volume * org_volume, | |
18467 | + struct lvm_logical_volume * snap_volume, | |
18468 | + u64 pe_start_sector, | |
18469 | + u64 org_sector, | |
18470 | + struct lvm_physical_volume * org_pv) | |
18471 | +{ | |
18472 | + struct lvm_physical_volume * snap_pv; | |
18473 | + struct snapshot_map_entry ** hash_table, * chain_head; | |
18474 | + struct snapshot_map_entry * target_entry, * new_map_entry; | |
18475 | + u64 chunk_sector, snap_sector; | |
18476 | + u32 hash_value; | |
18477 | + int rc = 0; | |
18478 | + | |
18479 | + /* Lock out this snapshot while we are remapping. */ | |
18480 | + down(&snap_volume->snap_semaphore); | |
18481 | + | |
18482 | + /* Make sure the snapshot has not been deactivated. */ | |
18483 | + if ( ! (snap_volume->lv_status & LV_ACTIVE) ) { | |
18484 | + goto out; | |
18485 | + } | |
18486 | + | |
18487 | + /* Search the hash table to see if this sector has already been | |
18488 | + * remapped on this snapshot. | |
18489 | + */ | |
18490 | + chunk_sector = ((org_sector - pe_start_sector) & | |
18491 | + ((u64)(~(snap_volume->chunk_size - 1)))) + | |
18492 | + pe_start_sector; | |
18493 | + hash_value = snapshot_hash(chunk_sector, snap_volume); | |
18494 | + hash_table = snap_volume->snapshot_map[org_pv->pv_number]; | |
18495 | + chain_head = hash_table[hash_value]; | |
18496 | + | |
18497 | + if ( snapshot_search_hash_chain(chunk_sector, | |
18498 | + chain_head, &target_entry) ) { | |
18499 | + /* Chunk is already remapped. */ | |
18500 | + goto out; | |
18501 | + } | |
18502 | + | |
18503 | + /* Is there room on the snapshot to remap this chunk? */ | |
18504 | + if ( snap_volume->next_free_chunk >= snap_volume->lv_size ) { | |
18505 | + /* At this point, the snapshot is full. Any further | |
18506 | + * writes to the original will cause the snapshot to | |
18507 | + * become "corrupt" because they can't be remapped. | |
18508 | + * Take this snapshot permanently offline. | |
18509 | + */ | |
18510 | + goto out_invalidate; | |
18511 | + } | |
18512 | + | |
18513 | + rc = snapshot_read_write_chunk(org_volume, org_pv, chunk_sector, | |
18514 | + snap_volume, &snap_pv, &snap_sector); | |
18515 | + if (rc) { | |
18516 | + rc = (rc > 0) ? -EIO : 0; | |
18517 | + goto out; | |
18518 | + } | |
18519 | + | |
18520 | + /* Fill in the appropriate COW table entry and write that | |
18521 | + * metadata sector back to the snapshot volume. Since we are | |
18522 | + * only writing one sector, there are no boundary conditions. | |
18523 | + * Must endian-convert each entry as it is added. | |
18524 | + */ | |
18525 | + snap_volume->cow_table[snap_volume->next_cow_entry].pv_org_number = | |
18526 | + cpu_to_le64((u64)(org_pv->pv_number)); | |
18527 | + snap_volume->cow_table[snap_volume->next_cow_entry].pv_org_rsector = | |
18528 | + cpu_to_le64p(&chunk_sector); | |
18529 | + snap_volume->cow_table[snap_volume->next_cow_entry].pv_snap_number = | |
18530 | + cpu_to_le64((u64)(snap_pv->pv_number)); | |
18531 | + snap_volume->cow_table[snap_volume->next_cow_entry].pv_snap_rsector = | |
18532 | + cpu_to_le64p(&snap_sector); | |
18533 | + | |
18534 | + if ( lvm_init_io(snap_volume->volume_node, 4, | |
18535 | + snap_volume->current_cow_sector, | |
18536 | + 1, snap_volume->cow_table) ) { | |
18537 | + /* The data was written to the snapshot, but | |
18538 | + * writing the metadata failed. | |
18539 | + */ | |
18540 | + goto out_invalidate; | |
18541 | + } | |
18542 | + | |
18543 | + snap_volume->next_cow_entry++; | |
18544 | + if ( snap_volume->next_cow_entry >= | |
18545 | + (EVMS_VSECTOR_SIZE / sizeof (struct lv_COW_table_disk)) ) { | |
18546 | + snap_volume->next_cow_entry = 0; | |
18547 | + snap_volume->current_cow_sector++; | |
18548 | + memset(snap_volume->cow_table, 0, EVMS_VSECTOR_SIZE); | |
18549 | + if ( lvm_init_io(snap_volume->volume_node, 4, | |
18550 | + snap_volume->current_cow_sector, | |
18551 | + 1, snap_volume->cow_table) ) { | |
18552 | + /* Can't clear out the next sector of metadata. */ | |
18553 | + goto out_invalidate; | |
18554 | + } | |
18555 | + } | |
18556 | + snap_volume->next_free_chunk += snap_volume->chunk_size; | |
18557 | + | |
18558 | + /* Create a new snapshot map entry and add it in the appropriate | |
18559 | + * place in the map. | |
18560 | + */ | |
18561 | + new_map_entry = allocate_snapshot_map_entry(chunk_sector, snap_sector); | |
18562 | + if (!new_map_entry) { | |
18563 | + rc = -ENOMEM; | |
18564 | + goto out_invalidate; | |
18565 | + } | |
18566 | + new_map_entry->snap_pv = snap_pv; | |
18567 | + if (target_entry) { | |
18568 | + insert_snapshot_map_entry(new_map_entry, target_entry); | |
18569 | + } else { | |
18570 | + insert_snapshot_map_entry_at_head(new_map_entry, | |
18571 | + &(hash_table[hash_value])); | |
18572 | + } | |
18573 | + | |
18574 | +out: | |
18575 | + up(&snap_volume->snap_semaphore); | |
18576 | + return rc; | |
18577 | + | |
18578 | +out_invalidate: | |
18579 | + invalidate_snapshot_volume(snap_volume); | |
18580 | + goto out; | |
18581 | +} | |
18582 | + | |
18583 | +/** | |
18584 | + * get_snapshot_stats | |
18585 | + **/ | |
18586 | +static int get_snapshot_stats(struct lvm_snapshot_stat_ioctl * snap_stats) | |
18587 | +{ | |
18588 | + struct lvm_logical_volume * volume; | |
18589 | + struct lvm_volume_group * group; | |
18590 | + | |
18591 | + /* Make sure the parameters are in range. */ | |
18592 | + if ( snap_stats->lv_number < 1 || snap_stats->lv_number > MAX_LV ) { | |
18593 | + return 1; | |
18594 | + } | |
18595 | + | |
18596 | + /* Make sure the specified group and volume exist, and that | |
18597 | + * this is a snapshot volume. | |
18598 | + */ | |
18599 | + find_group_by_uuid(snap_stats->vg_uuid, &group); | |
18600 | + if ( ! group || | |
18601 | + ! (volume = group->volume_list[snap_stats->lv_number]) || | |
18602 | + ! (volume->lv_access & LV_SNAPSHOT) ) { | |
18603 | + return 1; | |
18604 | + } | |
18605 | + | |
18606 | + /* Return the starting LBA of the next available chunk. */ | |
18607 | + snap_stats->next_free_chunk = volume->next_free_chunk; | |
18608 | + snap_stats->lv_status = volume->lv_status; | |
18609 | + | |
18610 | + return 0; | |
18611 | +} | |
18612 | + | |
18613 | + | |
18614 | +/********** Memory Allocation/Deallocation Functions **********/ | |
18615 | + | |
18616 | + | |
18617 | +/** | |
18618 | + * deallocate_physical_volume | |
18619 | + * | |
18620 | + * Free the memory used by this physical volume. Do not delete the EVMS | |
18621 | + * node in this function, since this could be called during an error | |
18622 | + * path when we want to save the logical node. | |
18623 | + **/ | |
18624 | +static int deallocate_physical_volume(struct lvm_physical_volume * pv_entry) | |
18625 | +{ | |
18626 | + if (pv_entry->pv) { | |
18627 | + kfree(pv_entry->pv); | |
18628 | + pv_entry->pv = NULL; | |
18629 | + } | |
18630 | + | |
18631 | + if (pv_entry->pe_map) { | |
18632 | + vfree(pv_entry->pe_map); | |
18633 | + pv_entry->pe_map = NULL; | |
18634 | + } | |
18635 | + | |
18636 | + kfree(pv_entry); | |
18637 | + return 0; | |
18638 | +} | |
18639 | + | |
18640 | +/** | |
18641 | + * allocate_physical_volume | |
18642 | + * | |
18643 | + * Create a new struct lvm_physical_volume for the specified volume group. | |
18644 | + * Initialize the new PV with the evms node and lvm pv information. | |
18645 | + **/ | |
18646 | +static struct lvm_physical_volume * | |
18647 | +allocate_physical_volume(struct evms_logical_node * node, struct pv_disk * pv) | |
18648 | +{ | |
18649 | + struct lvm_physical_volume * new_pv; | |
18650 | + | |
18651 | + new_pv = kmalloc(sizeof(struct lvm_physical_volume), GFP_NOIO); | |
18652 | + if (!new_pv) { | |
18653 | + LOG_CRITICAL("Error allocating physical volume for %s.\n", | |
18654 | + node->name); | |
18655 | + kfree(pv); | |
18656 | + goto out; | |
18657 | + } | |
18658 | + | |
18659 | + /* Initialize the PV. */ | |
18660 | + memset(new_pv, 0, sizeof(struct lvm_physical_volume)); | |
18661 | + new_pv->logical_node = node; | |
18662 | + new_pv->pv = pv; | |
18663 | + new_pv->pv_number = pv->pv_number; | |
18664 | + | |
18665 | +out: | |
18666 | + return new_pv; | |
18667 | +} | |
18668 | + | |
18669 | +/** | |
18670 | + * allocate_snapshot_map_entry | |
18671 | + * | |
18672 | + * Allocate memory for a new entry in the snapshot map and fill in the | |
18673 | + * sector values. The PV pointer is not filled in here, but can easily | |
18674 | + * be found by using the find_pv_by_number function. | |
18675 | + **/ | |
18676 | +static struct snapshot_map_entry * allocate_snapshot_map_entry(u64 org_sector, | |
18677 | + u64 snap_sector) | |
18678 | +{ | |
18679 | + struct snapshot_map_entry * new_entry; | |
18680 | + | |
18681 | + new_entry = kmalloc(sizeof(struct snapshot_map_entry), GFP_NOIO); | |
18682 | + if (!new_entry) { | |
18683 | + goto out; | |
18684 | + } | |
18685 | + memset(new_entry, 0, sizeof(struct snapshot_map_entry)); | |
18686 | + new_entry->org_sector = org_sector; | |
18687 | + new_entry->snap_sector = snap_sector; | |
18688 | +out: | |
18689 | + return new_entry; | |
18690 | +} | |
18691 | + | |
18692 | +/** | |
18693 | + * deallocate_snapshot_map | |
18694 | + * | |
18695 | + * This function will delete one hash table, which is part of the whole | |
18696 | + * snapshot remapping structure. Each hash table is an array of pointers | |
18697 | + * to linked lists of struct snapshot_map_entry's. | |
18698 | + **/ | |
18699 | +static int deallocate_snapshot_map(struct snapshot_map_entry ** table, | |
18700 | + u32 table_size) | |
18701 | +{ | |
18702 | + struct snapshot_map_entry * entry, * next; | |
18703 | + int i; | |
18704 | + | |
18705 | + if (table) { | |
18706 | + for ( i = 0; i < table_size; i++ ) { | |
18707 | + for ( entry = table[i]; entry; entry = next ) { | |
18708 | + next = entry->next; | |
18709 | + kfree(entry); | |
18710 | + } | |
18711 | + } | |
18712 | + vfree(table); | |
18713 | + } | |
18714 | + return 0; | |
18715 | +} | |
18716 | + | |
18717 | +/** | |
18718 | + * deallocate_logical_volume | |
18719 | + * | |
18720 | + * Delete the in-memory representation of a single LVM logical volume, | |
18721 | + * including its PE map and any snapshot data. Do not alter the parent | |
18722 | + * volume group, except to remove this volume from its volume list. | |
18723 | + **/ | |
18724 | +static int deallocate_logical_volume(struct lvm_logical_volume * volume) | |
18725 | +{ | |
18726 | + struct lvm_volume_group * group = volume->group; | |
18727 | + struct lvm_logical_volume * org_volume, * snap_volume; | |
18728 | + int i; | |
18729 | + | |
18730 | + if ( volume->lv_access & LV_SNAPSHOT ) { | |
18731 | + /* This volume is a snapshot. Remove it from the linked | |
18732 | + * list of volumes that are snapshotting the original. | |
18733 | + * First, the original volume must be quiesced. | |
18734 | + */ | |
18735 | + org_volume = volume->snapshot_org; | |
18736 | + | |
18737 | + if ( snapshot_check_quiesce_original(volume) ) { | |
18738 | + return -EINVAL; | |
18739 | + } | |
18740 | + | |
18741 | + remove_snapshot_from_chain(volume); | |
18742 | + | |
18743 | + /* If the snapshot that was just removed was the last/only | |
18744 | + * volume snapshotting the original, then mark the original | |
18745 | + * as no longer being snapshotted. | |
18746 | + */ | |
18747 | + if ( org_volume && !org_volume->snapshot_next ) { | |
18748 | + org_volume->lv_access &= ~LV_SNAPSHOT_ORG; | |
18749 | + } | |
18750 | + } else if ( volume->lv_access & LV_SNAPSHOT_ORG ) { | |
18751 | + /* If this volume is a snapshot original, all of its snapshots | |
18752 | + * must also be deleted. However, Those deletions need to be | |
18753 | + * taken care of by the engine. So just check that they have | |
18754 | + * all been quiesced before removing the original. | |
18755 | + */ | |
18756 | + if ( snapshot_check_quiesce_all(volume) ) { | |
18757 | + return -EINVAL; | |
18758 | + } | |
18759 | + | |
18760 | + /* In case there are any snapshots remaining, we must clear out | |
18761 | + * their pointers to this original to prevent errors when those | |
18762 | + * snapshots are accessed or deleted. | |
18763 | + */ | |
18764 | + for ( snap_volume = volume->snapshot_next; | |
18765 | + snap_volume; snap_volume = snap_volume->snapshot_next ) { | |
18766 | + snap_volume->snapshot_org = NULL; | |
18767 | + } | |
18768 | + } | |
18769 | + | |
18770 | + if (volume->name) { | |
18771 | + LOG_DEBUG("Deleting volume %s\n", volume->name); | |
18772 | + } | |
18773 | + | |
18774 | + /* Free all the memory. This includes the LE-to-PE map, any snapshot | |
18775 | + * hash tables, the COW table, and chunk data buffer. | |
18776 | + */ | |
18777 | + if (volume->le_map) { | |
18778 | + vfree(volume->le_map); | |
18779 | + volume->le_map = NULL; | |
18780 | + } | |
18781 | + if (volume->snapshot_map) { | |
18782 | + for ( i = 1; i <= group->pv_count; i++ ) { | |
18783 | + deallocate_snapshot_map(volume->snapshot_map[i], | |
18784 | + volume->hash_table_size); | |
18785 | + } | |
18786 | + kfree(volume->snapshot_map); | |
18787 | + volume->snapshot_map = NULL; | |
18788 | + } | |
18789 | + if (volume->cow_table) { | |
18790 | + kfree(volume->cow_table); | |
18791 | + volume->cow_table = NULL; | |
18792 | + } | |
18793 | + if (volume->chunk_data_buffer) { | |
18794 | + kfree(volume->chunk_data_buffer); | |
18795 | + volume->chunk_data_buffer = NULL; | |
18796 | + } | |
18797 | + | |
18798 | + /* Remove this volume from the group's list. */ | |
18799 | + if ( group && group->volume_list[volume->lv_number] == volume ) { | |
18800 | + group->volume_list[volume->lv_number] = NULL; | |
18801 | + group->volume_count--; | |
18802 | + } | |
18803 | + | |
18804 | + kfree(volume); | |
18805 | + return 0; | |
18806 | +} | |
18807 | + | |
18808 | +/** | |
18809 | + * allocate_logical_volume | |
18810 | + * | |
18811 | + * Allocate space for a new LVM logical volume, including space for the | |
18812 | + * LE-to-PE map and any necessary snapshot data. | |
18813 | + **/ | |
18814 | +static struct lvm_logical_volume * | |
18815 | +allocate_logical_volume(struct lv_disk * lv, struct lvm_volume_group * group) | |
18816 | +{ | |
18817 | + struct lvm_logical_volume * new_volume; | |
18818 | + u32 table_entries_per_chunk, table_chunks; | |
18819 | + int i; | |
18820 | + | |
18821 | + /* Allocate space for the new logical volume. */ | |
18822 | + new_volume = kmalloc(sizeof(struct lvm_logical_volume), GFP_NOIO); | |
18823 | + if (!new_volume) { | |
18824 | + LOG_CRITICAL("Error allocating new logical volume %s\n", | |
18825 | + lv->lv_name); | |
18826 | + goto out; | |
18827 | + } | |
18828 | + memset(new_volume, 0, sizeof(struct lvm_logical_volume)); | |
18829 | + | |
18830 | + /* Allocate space for the LE to PE mapping table. */ | |
18831 | + new_volume->le_map = vmalloc(lv->lv_allocated_le * | |
18832 | + sizeof(struct le_table_entry)); | |
18833 | + if (!new_volume->le_map) { | |
18834 | + LOG_CRITICAL("Error creating LE map for logical volume %s\n", | |
18835 | + lv->lv_name); | |
18836 | + goto error; | |
18837 | + } | |
18838 | + memset(new_volume->le_map, 0, | |
18839 | + lv->lv_allocated_le * sizeof(struct le_table_entry)); | |
18840 | + | |
18841 | + /* Initialize the rest of the new volume. | |
18842 | + * Need the +1 on lv_number to match the PE Map entries on the PV. | |
18843 | + */ | |
18844 | + new_volume->lv_number = lv->lv_number + 1; | |
18845 | + new_volume->lv_size = lv->lv_size; | |
18846 | + new_volume->lv_access = lv->lv_access | EVMS_LV_NEW | EVMS_LV_QUIESCED; | |
18847 | + new_volume->lv_status = lv->lv_status | LV_ACTIVE; | |
18848 | + new_volume->lv_minor = MINOR(lv->lv_dev); | |
18849 | + new_volume->stripes = lv->lv_stripes; | |
18850 | + new_volume->stripe_size = lv->lv_stripesize; | |
18851 | + new_volume->stripe_size_shift = evms_cs_log2(lv->lv_stripesize); | |
18852 | + new_volume->pe_size = group->vg->pe_size; | |
18853 | + new_volume->pe_size_shift = evms_cs_log2(group->vg->pe_size); | |
18854 | + new_volume->num_le = lv->lv_allocated_le; | |
18855 | + new_volume->group = group; | |
18856 | + /* Different naming scheme for EVMS nodes. */ | |
18857 | + if ( translate_lv_name(lv->lv_name, new_volume->name) ) { | |
18858 | + goto error; | |
18859 | + } | |
18860 | + | |
18861 | + if ( new_volume->lv_access & LV_SNAPSHOT ) { | |
18862 | + /* This volume is a snapshot, initialize the remaining data, | |
18863 | + * and allocate space for the remapping structures, and one | |
18864 | + * sector's worth of COW tables. | |
18865 | + */ | |
18866 | + new_volume->chunk_size = lv->lv_chunk_size; | |
18867 | + new_volume->num_chunks = lv->lv_size / lv->lv_chunk_size; | |
18868 | + new_volume->snap_org_minor = lv->lv_snapshot_minor; | |
18869 | + new_volume->next_cow_entry = 0; | |
18870 | + new_volume->current_cow_sector = 0; | |
18871 | + table_entries_per_chunk = (new_volume->chunk_size << | |
18872 | + EVMS_VSECTOR_SIZE_SHIFT) / | |
18873 | + sizeof(struct lv_COW_table_disk); | |
18874 | + table_chunks = (new_volume->num_chunks + | |
18875 | + table_entries_per_chunk - 1) / | |
18876 | + table_entries_per_chunk; | |
18877 | + new_volume->next_free_chunk = table_chunks * | |
18878 | + new_volume->chunk_size; | |
18879 | + new_volume->hash_table_size = (lv->lv_size / lv->lv_chunk_size / | |
18880 | + MAX_HASH_CHAIN_ENTRIES) + 1; | |
18881 | + | |
18882 | + new_volume->cow_table = kmalloc(EVMS_VSECTOR_SIZE, GFP_NOIO); | |
18883 | + if (!new_volume->cow_table) { | |
18884 | + LOG_CRITICAL("Error allocating COW table for logical volume %s\n", | |
18885 | + lv->lv_name); | |
18886 | + goto error; | |
18887 | + } | |
18888 | + memset(new_volume->cow_table, 0, EVMS_VSECTOR_SIZE); | |
18889 | + | |
18890 | + new_volume->snapshot_map = kmalloc((group->pv_count + 1) * | |
18891 | + sizeof(struct snapshot_map_entry **), | |
18892 | + GFP_NOIO); | |
18893 | + if (!new_volume->snapshot_map) { | |
18894 | + LOG_CRITICAL("Error allocating snapshot map for logical volume %s\n", | |
18895 | + lv->lv_name); | |
18896 | + goto error; | |
18897 | + } | |
18898 | + | |
18899 | + new_volume->snapshot_map[0] = NULL; | |
18900 | + for ( i = 1; i <= group->pv_count; i++ ) { | |
18901 | + new_volume->snapshot_map[i] = | |
18902 | + vmalloc(new_volume->hash_table_size * | |
18903 | + sizeof(struct snapshot_map_entry *)); | |
18904 | + if (!new_volume->snapshot_map[i]) { | |
18905 | + LOG_CRITICAL("Error allocating snapshot sub-map for logical volume %s\n", | |
18906 | + lv->lv_name); | |
18907 | + goto error; | |
18908 | + } | |
18909 | + memset(new_volume->snapshot_map[i], 0, | |
18910 | + new_volume->hash_table_size * | |
18911 | + sizeof(struct snapshot_map_entry *)); | |
18912 | + } | |
18913 | + init_MUTEX(&new_volume->snap_semaphore); | |
18914 | + } else if ( new_volume->lv_access & LV_SNAPSHOT_ORG ) { | |
18915 | + /* This volume is a snapshot original, allocate space to use for | |
18916 | + * copying snapshot chunks. This will now be a fixed size | |
18917 | + * instead of being based on the chunk size of the snapshots. | |
18918 | + */ | |
18919 | + new_volume->chunk_size = CHUNK_DATA_BUFFER_SIZE; | |
18920 | + new_volume->chunk_data_buffer = | |
18921 | + kmalloc(new_volume->chunk_size << | |
18922 | + EVMS_VSECTOR_SIZE_SHIFT, GFP_NOIO); | |
18923 | + if (!new_volume->chunk_data_buffer) { | |
18924 | + LOG_SERIOUS("Error allocating snapshot chunk buffer for logical volume %s\n", | |
18925 | + lv->lv_name); | |
18926 | + goto error; | |
18927 | + } | |
18928 | + memset(new_volume->chunk_data_buffer, 0, | |
18929 | + new_volume->chunk_size << EVMS_VSECTOR_SIZE_SHIFT); | |
18930 | + } | |
18931 | + | |
18932 | +out: | |
18933 | + return new_volume; | |
18934 | +error: | |
18935 | + deallocate_logical_volume(new_volume); | |
18936 | + new_volume = NULL; | |
18937 | + goto out; | |
18938 | +} | |
18939 | + | |
18940 | +/** | |
18941 | + * deallocate_volume_group | |
18942 | + * | |
18943 | + * Delete the entire in-memory representation of an LVM volume group, | |
18944 | + * including all PVs and logical volumes. If this group is on LVM's | |
18945 | + * volume group list, remove it. | |
18946 | + **/ | |
18947 | +static int deallocate_volume_group(struct lvm_volume_group * group) | |
18948 | +{ | |
18949 | + struct lvm_physical_volume * pv_entry, * next_pv; | |
18950 | + int i; | |
18951 | + | |
18952 | + LOG_DEBUG("Deleting volume group %s\n", group->vg_name); | |
18953 | + | |
18954 | + /* Remove the group from the global list. */ | |
18955 | + remove_group_from_list(group); | |
18956 | + | |
18957 | + /* Delete the LV metadata array. */ | |
18958 | + if (group->lv_array) { | |
18959 | + vfree(group->lv_array); | |
18960 | + group->lv_array = NULL; | |
18961 | + } | |
18962 | + | |
18963 | + /* Delete the PV UUID list. */ | |
18964 | + if (group->uuid_list) { | |
18965 | + vfree(group->uuid_list); | |
18966 | + group->uuid_list = NULL; | |
18967 | + } | |
18968 | + | |
18969 | + /* Delete all logical volumes. */ | |
18970 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
18971 | + if (group->volume_list[i]) { | |
18972 | + deallocate_logical_volume(group->volume_list[i]); | |
18973 | + group->volume_list[i] = NULL; | |
18974 | + } | |
18975 | + } | |
18976 | + | |
18977 | + /* Delete all PVs from the group's list. */ | |
18978 | + for ( pv_entry = group->pv_list; pv_entry; pv_entry = next_pv ) { | |
18979 | + next_pv = pv_entry->next; | |
18980 | + if (pv_entry->logical_node) { | |
18981 | + /* Send a delete command down to the segment manager. */ | |
18982 | + LOG_DEBUG("Deleting PV %s from group %s\n", | |
18983 | + pv_entry->logical_node->name, group->vg_name); | |
18984 | + DELETE(pv_entry->logical_node); | |
18985 | + pv_entry->logical_node = NULL; | |
18986 | + } | |
18987 | + deallocate_physical_volume(pv_entry); | |
18988 | + } | |
18989 | + | |
18990 | + /* Delete the VG metadata. */ | |
18991 | + if (group->vg) { | |
18992 | + kfree(group->vg); | |
18993 | + group->vg = NULL; | |
18994 | + } | |
18995 | + | |
18996 | + kfree(group); | |
18997 | + return 0; | |
18998 | +} | |
18999 | + | |
19000 | +/** | |
19001 | + * allocate_volume_group | |
19002 | + * | |
19003 | + * Allocate space for a new LVM volume group and all of its sub-fields. | |
19004 | + * Initialize the appropriate fields. | |
19005 | + * vg parameter should already have an allocate/initialized struct vg_disk. | |
19006 | + **/ | |
19007 | +static struct lvm_volume_group * allocate_volume_group(struct vg_disk * vg, | |
19008 | + u8 * vg_name) | |
19009 | +{ | |
19010 | + struct lvm_volume_group * new_group; | |
19011 | + | |
19012 | + /* The volume group itself. */ | |
19013 | + new_group = kmalloc(sizeof(struct lvm_volume_group), GFP_NOIO); | |
19014 | + if (!new_group) { | |
19015 | + kfree(vg); | |
19016 | + goto out; | |
19017 | + } | |
19018 | + | |
19019 | + /* Initialize the new group. */ | |
19020 | + memset(new_group, 0, sizeof(struct lvm_volume_group)); | |
19021 | + memcpy(new_group->vg_uuid, vg->vg_uuid, UUID_LEN); | |
19022 | + strncpy(new_group->vg_name, vg_name, NAME_LEN - 1); | |
19023 | + new_group->vg = vg; | |
19024 | + /* Default sector and block sizes. */ | |
19025 | + new_group->hard_sect_size = 512; | |
19026 | + new_group->block_size = 1024; | |
19027 | + new_group->flags = EVMS_VG_DIRTY; | |
19028 | + | |
19029 | + LOG_DETAILS("Discovered volume group %s\n", new_group->vg_name); | |
19030 | + | |
19031 | +out: | |
19032 | + return new_group; | |
19033 | +} | |
19034 | + | |
19035 | +/** | |
19036 | + * remove_pv_from_group | |
19037 | + * | |
19038 | + * In the engine, when a PV is removed from a group (on a vgreduce), that | |
19039 | + * same PV must be removed from that group in the kernel. Otherwise, when | |
19040 | + * the rediscover occurs, that PV will still appear in the group, and | |
19041 | + * will cause segfaults when we try to read metadata from it. | |
19042 | + **/ | |
19043 | +static int remove_pv_from_group(int pv_number, unsigned char * vg_uuid) | |
19044 | +{ | |
19045 | + struct lvm_volume_group * group; | |
19046 | + struct lvm_physical_volume * pv_entry; | |
19047 | + struct lvm_physical_volume ** p_pv_entry; | |
19048 | + | |
19049 | + /* Make sure the numbers are in range. */ | |
19050 | + if ( pv_number < 0 || pv_number > MAX_PV ) { | |
19051 | + return 0; | |
19052 | + } | |
19053 | + | |
19054 | + /* Make sure the group exists. */ | |
19055 | + find_group_by_uuid(vg_uuid, &group); | |
19056 | + if (!group) { | |
19057 | + return 0; | |
19058 | + } | |
19059 | + | |
19060 | + /* Make sure the PV is in this group. */ | |
19061 | + pv_entry = find_pv_by_number(pv_number, group); | |
19062 | + if (!pv_entry) { | |
19063 | + LOG_WARNING("Did not find PV %d in group %s\n", | |
19064 | + pv_number, group->vg_name); | |
19065 | + return 0; | |
19066 | + } | |
19067 | + | |
19068 | + /* Make sure the PV is not in use by any volumes. */ | |
19069 | + if ( check_pv_for_lv(pv_entry, group) ) { | |
19070 | + LOG_SERIOUS("PV %d in group %s still contains LVs\n", | |
19071 | + pv_number, group->vg_name); | |
19072 | + return -EINVAL; | |
19073 | + } | |
19074 | + | |
19075 | + /* Take this PV out of the group's list. */ | |
19076 | + for ( p_pv_entry = &group->pv_list; | |
19077 | + *p_pv_entry; p_pv_entry = &(*p_pv_entry)->next ) { | |
19078 | + if ( *p_pv_entry == pv_entry ) { | |
19079 | + *p_pv_entry = (*p_pv_entry)->next; | |
19080 | + pv_entry->next = NULL; | |
19081 | + break; | |
19082 | + } | |
19083 | + } | |
19084 | + | |
19085 | + group->pv_count--; | |
19086 | + | |
19087 | + /* There is no way that this PV was the last in this group, so the | |
19088 | + * group never needs to be deleted at this point. The only way this | |
19089 | + * group will exist in the kernel is if there are volumes exported from | |
19090 | + * it. If this was the last PV, then those volumes must be on that PV, | |
19091 | + * and it wouldn't be allowed to be removed from the group (above). | |
19092 | + */ | |
19093 | + | |
19094 | + /* Free up the memory for this PV. Just drop the node. */ | |
19095 | + deallocate_physical_volume(pv_entry); | |
19096 | + | |
19097 | + LOG_DEBUG("PV %d removed from group %s\n", pv_number, group->vg_name); | |
19098 | + return 0; | |
19099 | +} | |
19100 | + | |
19101 | + | |
19102 | +/********** Consistency Checking Functions **********/ | |
19103 | + | |
19104 | + | |
19105 | +/** | |
19106 | + * clear_le_entries_for_missing_pv | |
19107 | + * | |
19108 | + * In the event that a PV turns up missing during a rediscover, we | |
19109 | + * need to erase any LE map entries that might point to it. | |
19110 | + **/ | |
19111 | +static void | |
19112 | +clear_le_entries_for_missing_pv(struct lvm_volume_group * group, | |
19113 | + struct lvm_physical_volume * pv_entry) | |
19114 | +{ | |
19115 | + struct lvm_logical_volume * volume; | |
19116 | + int i, j; | |
19117 | + | |
19118 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
19119 | + if (group->volume_list[i]) { | |
19120 | + volume = group->volume_list[i]; | |
19121 | + for ( j = 0; j < volume->num_le; j++ ) { | |
19122 | + if ( volume->le_map[j].owning_pv == pv_entry ) { | |
19123 | + volume->le_map[j].owning_pv = NULL; | |
19124 | + volume->le_map[j].pe_sector_offset = 0; | |
19125 | + } | |
19126 | + } | |
19127 | + } | |
19128 | + } | |
19129 | +} | |
19130 | + | |
19131 | +/** | |
19132 | + * check_volume_groups | |
19133 | + * | |
19134 | + * This function performs some simple consistency checks on all dirty | |
19135 | + * volume groups. Any groups that have no PVs are deleted. If any metadata | |
19136 | + * structures (PV or VG) are missing, they are read in from disk. | |
19137 | + **/ | |
19138 | +static int check_volume_groups(void) | |
19139 | +{ | |
19140 | + struct lvm_volume_group * group, * next_group; | |
19141 | + struct lvm_physical_volume * pv_entry, * next_pv; | |
19142 | + int rc = 0; | |
19143 | + | |
19144 | + for ( group = lvm_group_list; group; group = next_group ) { | |
19145 | + next_group = group->next_group; | |
19146 | + | |
19147 | + LOG_DEBUG("Checking Group %s\n", group->vg_name); | |
19148 | + | |
19149 | + /* If a group has no PVs, it can be safely deleted, | |
19150 | + * because we can't find any volumes on it. | |
19151 | + */ | |
19152 | + if (!group->pv_count) { | |
19153 | + LOG_WARNING("No PVs found for Group %s.\n", | |
19154 | + group->vg_name); | |
19155 | + if (!group->volume_count) { | |
19156 | + deallocate_volume_group(group); | |
19157 | + } | |
19158 | + continue; | |
19159 | + } | |
19160 | + | |
19161 | + /* Make sure all metadata for the PVs is present. On a | |
19162 | + * rediscover, it may be missing, because we delete it at the | |
19163 | + * end of discovery. If any is missing, read it in from disk. | |
19164 | + * This is only necessary in the kernel. It can't happen in | |
19165 | + * the engine. | |
19166 | + */ | |
19167 | + for ( pv_entry = group->pv_list; | |
19168 | + pv_entry; pv_entry = next_pv ) { | |
19169 | + next_pv = pv_entry->next; | |
19170 | + if (!pv_entry->pv) { | |
19171 | + LOG_DEBUG("Re-reading PV metadata for %s\n", | |
19172 | + pv_entry->logical_node->name); | |
19173 | + rc = read_pv(pv_entry->logical_node, | |
19174 | + &pv_entry->pv); | |
19175 | + if (rc) { | |
19176 | + /* What happens if we can't re-read the | |
19177 | + * PV metadata? This PV must be removed | |
19178 | + * from the group. Need to also clear | |
19179 | + * all LE entries in all LVs that are | |
19180 | + * pointing to this PV before it can be | |
19181 | + * removed from the list. | |
19182 | + */ | |
19183 | + LOG_SERIOUS("PV metadata is missing or cannot be read from %s\n", | |
19184 | + pv_entry->logical_node->name); | |
19185 | + clear_le_entries_for_missing_pv(group, | |
19186 | + pv_entry); | |
19187 | + remove_pv_from_group(pv_entry->pv_number, | |
19188 | + group->vg_uuid); | |
19189 | + continue; | |
19190 | + } | |
19191 | + pv_entry->pv_number = pv_entry->pv->pv_number; | |
19192 | + | |
19193 | + /* Check for a "stale" PV. This case should be | |
19194 | + * already be covered, as long as the Engine is | |
19195 | + * calling the PV_REMOVE ioctl when it does a | |
19196 | + * vgreduce or a pvremove. If this is the last | |
19197 | + * PV in the group, the group will be deleted. | |
19198 | + */ | |
19199 | + if (!pv_entry->pv_number) { | |
19200 | + remove_pv_from_group(0, group->vg_uuid); | |
19201 | + continue; | |
19202 | + } | |
19203 | + } | |
19204 | + | |
19205 | + if (!pv_entry->pe_map) { | |
19206 | + LOG_DEBUG("Re-reading PE maps for %s\n", | |
19207 | + pv_entry->logical_node->name); | |
19208 | + rc = read_pe_map(pv_entry); | |
19209 | + if (rc) { | |
19210 | + LOG_WARNING("Error reading PE maps for %s\n", | |
19211 | + pv_entry->logical_node->name); | |
19212 | + LOG_WARNING("Any volumes residing on %s will be incomplete!\n", | |
19213 | + pv_entry->logical_node->name); | |
19214 | + } | |
19215 | + } | |
19216 | + } | |
19217 | + | |
19218 | + /* Make sure the metadata for the VG is present. If it's | |
19219 | + * missing, read it in from the first PV in the VG. | |
19220 | + */ | |
19221 | + if (!group->vg && group->pv_count) { | |
19222 | + LOG_DEBUG("Re-reading VG metadata for Group %s\n", | |
19223 | + group->vg_name); | |
19224 | + pv_entry = group->pv_list; | |
19225 | + rc = read_vg(pv_entry->logical_node, | |
19226 | + pv_entry->pv, &group->vg); | |
19227 | + if (rc) { | |
19228 | + /* What happens if we can't re-read the | |
19229 | + * VG metadata? It's definitely bad | |
19230 | + * news. Should we delete the VG? | |
19231 | + */ | |
19232 | + continue; | |
19233 | + } | |
19234 | + } | |
19235 | + | |
19236 | + /* Display a warning if the number of PVs found for the group | |
19237 | + * doesn't match the number of PVs recorded for the VG. | |
19238 | + */ | |
19239 | + if ( group->vg && group->pv_count != group->vg->pv_cur ) { | |
19240 | + LOG_WARNING("Group %s is incomplete.\n", | |
19241 | + group->vg_name); | |
19242 | + LOG_WARNING(" Only %d of %d PVs found.\n", | |
19243 | + group->pv_count, group->vg->pv_cur); | |
19244 | + LOG_WARNING(" Volumes in this group may be incomplete.\n"); | |
19245 | + } | |
19246 | + } | |
19247 | + | |
19248 | + return 0; | |
19249 | +} | |
19250 | + | |
19251 | +/** | |
19252 | + * check_le_maps | |
19253 | + * | |
19254 | + * Make sure all volumes in this group have valid LE-to-PE maps. Any | |
19255 | + * volume that doesn't is marked as incomplete. This is safe for | |
19256 | + * re-discovery because only new volumes could have corrupted LE maps. | |
19257 | + **/ | |
19258 | +static int check_le_maps(struct lvm_volume_group * group) | |
19259 | +{ | |
19260 | + struct lvm_logical_volume * volume; | |
19261 | + int i, j, count; | |
19262 | + | |
19263 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
19264 | + volume = group->volume_list[i]; | |
19265 | + if (!volume) { | |
19266 | + continue; | |
19267 | + } | |
19268 | + | |
19269 | + if (!volume->le_map) { | |
19270 | + /* No point in keeping the volume around if it has | |
19271 | + * no LE map at all. | |
19272 | + */ | |
19273 | + LOG_SERIOUS("Volume %s has no LE map.\n", volume->name); | |
19274 | + deallocate_logical_volume(volume); | |
19275 | + continue; | |
19276 | + } | |
19277 | + | |
19278 | + /* If any entries in the LE map are missing, mark this volume | |
19279 | + * as incomplete. | |
19280 | + */ | |
19281 | + for ( j = 0, count = 0; j < volume->num_le; j++ ) { | |
19282 | + if ( !volume->le_map[j].owning_pv || | |
19283 | + !volume->le_map[j].pe_sector_offset) { | |
19284 | + count++; | |
19285 | + } | |
19286 | + } | |
19287 | + if (count) { | |
19288 | + LOG_SERIOUS("Volume %s has incomplete LE map.\n", | |
19289 | + volume->name); | |
19290 | + LOG_SERIOUS(" Missing %d out of %d LEs.\n", | |
19291 | + count, volume->num_le); | |
19292 | + volume->lv_access |= EVMS_LV_INCOMPLETE; | |
19293 | + } | |
19294 | + } | |
19295 | + return 0; | |
19296 | +} | |
19297 | + | |
19298 | +/** | |
19299 | + * check_snapshot_map | |
19300 | + * | |
19301 | + * For snapshot volumes, make sure the snapshot map is intact, and that | |
19302 | + * any existing entries in the map are in the correct order and there | |
19303 | + * are no duplicate entries. | |
19304 | + **/ | |
19305 | +static int check_snapshot_map(struct lvm_logical_volume * snap_volume) | |
19306 | +{ | |
19307 | + struct snapshot_map_entry ** table, * curr; | |
19308 | + int i, j; | |
19309 | + | |
19310 | + if ( ! (snap_volume->lv_access & LV_SNAPSHOT) ) { | |
19311 | + return 0; | |
19312 | + } | |
19313 | + if (!snap_volume->snapshot_map) { | |
19314 | + snap_volume->lv_access |= EVMS_LV_INVALID; | |
19315 | + return -EINVAL; | |
19316 | + } | |
19317 | + | |
19318 | + for ( i = 1; i <= snap_volume->group->pv_count; i++ ) { | |
19319 | + if (!snap_volume->snapshot_map[i]) { | |
19320 | + snap_volume->lv_access |= EVMS_LV_INVALID; | |
19321 | + return -EINVAL; | |
19322 | + } | |
19323 | + table = snap_volume->snapshot_map[i]; | |
19324 | + for ( j = 0; j < snap_volume->hash_table_size; j++ ) { | |
19325 | + for ( curr = table[j]; curr; curr = curr->next ) { | |
19326 | + if ( curr->next && | |
19327 | + curr->org_sector >= | |
19328 | + curr->next->org_sector) { | |
19329 | + snap_volume->lv_access |= | |
19330 | + EVMS_LV_INVALID; | |
19331 | + return -EINVAL; | |
19332 | + } | |
19333 | + } | |
19334 | + } | |
19335 | + } | |
19336 | + return 0; | |
19337 | +} | |
19338 | + | |
19339 | +/** | |
19340 | + * check_logical_volumes | |
19341 | + * | |
19342 | + * Perform a consistency check on all of the logical volumes that have been | |
19343 | + * discovered. Any volume that has any inconsistencies will be marked as | |
19344 | + * incomplete or invalid, depending on the severity of the problem. At the | |
19345 | + * end, all invalid volumes are deleted. If the deleted_incompletes | |
19346 | + * parameter is set, those will also be deleted. | |
19347 | + **/ | |
19348 | +static int check_logical_volumes(int final_discovery) | |
19349 | +{ | |
19350 | + struct lvm_volume_group * group; | |
19351 | + struct lvm_logical_volume * volume, * snap, * next; | |
19352 | + int count, i, j; | |
19353 | + | |
19354 | + /* Check every valid, dirty volume group. */ | |
19355 | + for ( group = lvm_group_list; group; group = group->next_group ) { | |
19356 | + if ( ! (group->flags & EVMS_VG_DIRTY) ) { | |
19357 | + continue; | |
19358 | + } | |
19359 | + /* Check every valid volume in this group. */ | |
19360 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
19361 | + volume = group->volume_list[i]; | |
19362 | + if (!volume) { | |
19363 | + continue; | |
19364 | + } | |
19365 | + | |
19366 | + LOG_DEBUG("Checking logical volume %s\n", volume->name); | |
19367 | + | |
19368 | + if (!volume->group) { | |
19369 | + volume->group = group; | |
19370 | + } | |
19371 | + | |
19372 | + /* All LE-map entries must have valid values. The I/O | |
19373 | + * paths now detect missing LE entries. | |
19374 | + */ | |
19375 | + if (volume->le_map) { | |
19376 | + for ( j = 0, count = 0; | |
19377 | + j < volume->num_le; j++ ) { | |
19378 | + if ( !volume->le_map[j].owning_pv || | |
19379 | + !volume->le_map[j].pe_sector_offset ) { | |
19380 | + count++; | |
19381 | + } | |
19382 | + } | |
19383 | + if (count) { | |
19384 | + LOG_SERIOUS("Volume %s has incomplete LE map.\n", | |
19385 | + volume->name); | |
19386 | + LOG_SERIOUS(" Missing %d out of %d LEs.\n", | |
19387 | + count, volume->num_le); | |
19388 | + volume->lv_access |= EVMS_LV_INCOMPLETE; | |
19389 | + } else { | |
19390 | + /* In case this volume was previously | |
19391 | + * marked incomplete. | |
19392 | + */ | |
19393 | + volume->lv_access &= | |
19394 | + ~EVMS_LV_INCOMPLETE; | |
19395 | + } | |
19396 | + } else { | |
19397 | + /* This should only ever happen due to | |
19398 | + * memory corruption. | |
19399 | + */ | |
19400 | + LOG_SERIOUS("Volume %s has no LE map.\n", | |
19401 | + volume->name); | |
19402 | + volume->lv_access |= EVMS_LV_INVALID; | |
19403 | + } | |
19404 | + | |
19405 | + if ( volume->lv_access & LV_SNAPSHOT_ORG ) { | |
19406 | + /* For a snapshot original, check all snapshots | |
19407 | + * in the chain, to make sure they point back to | |
19408 | + * the original. Also, make sure there is memory | |
19409 | + * for the chunk buffer. | |
19410 | + */ | |
19411 | + for ( snap = volume->snapshot_next, count = 0; | |
19412 | + snap; | |
19413 | + snap = snap->snapshot_next, count++ ) { | |
19414 | + if ( snap->snapshot_org != volume ) { | |
19415 | + LOG_SERIOUS("Snapshot volume %s not pointing at correct original\n", | |
19416 | + volume->name); | |
19417 | + snap->snapshot_org = NULL; | |
19418 | + snap->lv_access |= | |
19419 | + EVMS_LV_INVALID; | |
19420 | + } | |
19421 | + } | |
19422 | + if (!count) { | |
19423 | + LOG_WARNING("No snapshots found for volume %s\n", | |
19424 | + volume->name); | |
19425 | + if (final_discovery) { | |
19426 | + volume->lv_access &= | |
19427 | + ~LV_SNAPSHOT_ORG; | |
19428 | + } | |
19429 | + } else if (!volume->chunk_data_buffer) { | |
19430 | + volume->lv_access |= EVMS_LV_INVALID; | |
19431 | + } | |
19432 | + } else if ( volume->lv_access & LV_SNAPSHOT ) { | |
19433 | + /* For a snapshot volume, make sure it points | |
19434 | + * back to its original. Also make sure there is | |
19435 | + * memory for the cow table, and that any | |
19436 | + * existing snapshot entries in the snapshot map | |
19437 | + * are correctly ordered. | |
19438 | + */ | |
19439 | + /* Is there a COW table? */ | |
19440 | + if (!volume->cow_table) { | |
19441 | + LOG_SERIOUS("Snapshot volume %s has no COW table\n", | |
19442 | + volume->name); | |
19443 | + volume->lv_access |= EVMS_LV_INVALID; | |
19444 | + } | |
19445 | + /* Is the snapshot map in order? */ | |
19446 | + if ( check_snapshot_map(volume) ) { | |
19447 | + LOG_SERIOUS("Snapshot volume %s has snapshot map inconsistency\n", | |
19448 | + volume->name); | |
19449 | + volume->lv_access |= EVMS_LV_INVALID; | |
19450 | + } | |
19451 | + /* Is there an original volume? This is only | |
19452 | + * a real problem during final discovery. | |
19453 | + */ | |
19454 | + if (!volume->snapshot_org) { | |
19455 | + LOG_SERIOUS("Snapshot volume %s not pointing at an original\n", | |
19456 | + volume->name); | |
19457 | + if (final_discovery) { | |
19458 | + volume->lv_access |= | |
19459 | + EVMS_LV_INVALID; | |
19460 | + } | |
19461 | + } | |
19462 | + /* Is the original the correct one? */ | |
19463 | + else if ( volume->snap_org_minor != | |
19464 | + volume->snapshot_org->lv_minor ) { | |
19465 | + LOG_SERIOUS("Snapshot volume %s not pointing at correct original\n", | |
19466 | + volume->name); | |
19467 | + volume->lv_access |= EVMS_LV_INVALID; | |
19468 | + } | |
19469 | + } | |
19470 | + /* Delete any invalid volumes from use. Delete | |
19471 | + * incomplete volumes as well if this is not final | |
19472 | + * discovery. If a snapshot original is bad, delete all | |
19473 | + * of its snapshots. | |
19474 | + */ | |
19475 | + if ( volume->lv_access & EVMS_LV_INVALID || | |
19476 | + (!final_discovery && | |
19477 | + (volume->lv_access & EVMS_LV_INCOMPLETE) && | |
19478 | + (volume->lv_access & EVMS_LV_NEW)) ) { | |
19479 | + if ( volume->lv_access & LV_SNAPSHOT_ORG ) { | |
19480 | + for ( snap = volume->snapshot_next; | |
19481 | + snap; snap = next ) { | |
19482 | + next = snap->snapshot_next; | |
19483 | + snap->snapshot_next = NULL; | |
19484 | + snap->snapshot_org = NULL; | |
19485 | + invalidate_snapshot_volume(snap); | |
19486 | + deallocate_logical_volume(snap); | |
19487 | + } | |
19488 | + volume->snapshot_next = NULL; | |
19489 | + } else if ( volume->lv_access & LV_SNAPSHOT ) { | |
19490 | + invalidate_snapshot_volume(volume); | |
19491 | + } | |
19492 | + deallocate_logical_volume(volume); | |
19493 | + } | |
19494 | + } | |
19495 | + } | |
19496 | + | |
19497 | + return 0; | |
19498 | +} | |
19499 | + | |
19500 | + | |
19501 | +/********** Volume Group Discovery Functions **********/ | |
19502 | + | |
19503 | + | |
19504 | +/** | |
19505 | + * find_group_for_pv | |
19506 | + * | |
19507 | + * This is a discover-time function. It reads the VG metadata info for the | |
19508 | + * specified node, and locates the appropriate group that owns that | |
19509 | + * node. If that group does not already exist, it is created and | |
19510 | + * initialized. | |
19511 | + **/ | |
19512 | +static int find_group_for_pv(struct evms_logical_node * node, | |
19513 | + struct pv_disk * pv, | |
19514 | + struct lvm_volume_group ** group) | |
19515 | +{ | |
19516 | + struct vg_disk * vg; | |
19517 | + int rc; | |
19518 | + | |
19519 | + *group = NULL; | |
19520 | + | |
19521 | + /* Check for an unassigned PV. */ | |
19522 | + if ( pv->vg_name[0] == 0 ) { | |
19523 | + return 0; | |
19524 | + } | |
19525 | + | |
19526 | + /* Read the VG on-disk info for this PV. If this succeeds, it | |
19527 | + * allocates a new VG metadata structure. | |
19528 | + */ | |
19529 | + rc = read_vg(node, pv, &vg); | |
19530 | + if (rc) { | |
19531 | + return rc; | |
19532 | + } | |
19533 | + | |
19534 | + /* Use the UUID from the VG metadata to determine if this group | |
19535 | + * has already been discovered and constructed. | |
19536 | + */ | |
19537 | + find_group_by_uuid(vg->vg_uuid, group); | |
19538 | + | |
19539 | + if (!*group) { | |
19540 | + /* Create a new group entry and add to the global list. */ | |
19541 | + *group = allocate_volume_group(vg, pv->vg_name); | |
19542 | + if (!*group) { | |
19543 | + return -ENOMEM; | |
19544 | + } | |
19545 | + add_group_to_list(*group); | |
19546 | + } else if (!(*group)->vg) { | |
19547 | + /* On a rediscover, the VG metadata for an existing group might | |
19548 | + * be missing. Fill it in if necessary. This check is also not | |
19549 | + * necessary in the engine, since the metadata is never deleted. | |
19550 | + */ | |
19551 | +/* Should we re-copy vg_name? (vg_uuid can not be allowed to change). | |
19552 | + * Or should vg_name changes be done through direct ioctl only? | |
19553 | + */ | |
19554 | + (*group)->vg = vg; | |
19555 | + } else { | |
19556 | + kfree(vg); | |
19557 | + } | |
19558 | + | |
19559 | + /* Read in the UUID list for this group, if it isn't present. */ | |
19560 | + rc = read_uuid_list(node, pv, *group); | |
19561 | + if (rc) { | |
19562 | + LOG_WARNING("Error reading UUID list for group %s.\n", | |
19563 | + (*group)->vg_name); | |
19564 | + LOG_WARNING("May not be able to verify PV UUIDs for group %s\n", | |
19565 | + (*group)->vg_name); | |
19566 | + } | |
19567 | + | |
19568 | + /* In the kernel, any time we even see a PV for a group, that group | |
19569 | + * must be marked dirty so its volumes will be re-exported. | |
19570 | + */ | |
19571 | + (*group)->flags |= EVMS_VG_DIRTY; | |
19572 | + | |
19573 | + return 0; | |
19574 | +} | |
19575 | + | |
19576 | +/** | |
19577 | + * check_for_duplicate_pv | |
19578 | + * | |
19579 | + * Search the list of PVs in the specified volume group. If the | |
19580 | + * specified node already exists in the list, we can discard it. | |
19581 | + **/ | |
19582 | +static int check_for_duplicate_pv(struct evms_logical_node * node, | |
19583 | + struct pv_disk * pv, | |
19584 | + struct lvm_volume_group * group) | |
19585 | +{ | |
19586 | + struct lvm_physical_volume * pv_entry; | |
19587 | + | |
19588 | + /* For re-discovery, we need to search all existing PVs in this VG to | |
19589 | + * make sure we didn't get a duplicate from the plugin below us. The | |
19590 | + * plugins below us should be re-exporting the same node on | |
19591 | + * re-discovery, instead of creating a new node to represent the same | |
19592 | + * objects, so just check the memory location. | |
19593 | + */ | |
19594 | + for ( pv_entry = group->pv_list; pv_entry; pv_entry = pv_entry->next ) { | |
19595 | + if ( pv_entry->logical_node == node ) { | |
19596 | + | |
19597 | + /* We found a duplicate. Just ignore the duplicate. */ | |
19598 | + LOG_DEBUG("PV %s is already in Group %s.\n", | |
19599 | + node->name, group->vg_name); | |
19600 | + | |
19601 | + /* Even if the node was a duplicate, we may need to | |
19602 | + * fill in the pv entry for this partition, since we | |
19603 | + * always delete those at the end of discovery. | |
19604 | + */ | |
19605 | + if (!pv_entry->pv) { | |
19606 | + pv_entry->pv = pv; | |
19607 | + pv_entry->pv_number = pv->pv_number; | |
19608 | + } else { | |
19609 | + kfree(pv); | |
19610 | + } | |
19611 | + | |
19612 | + return 1; | |
19613 | + } | |
19614 | + } | |
19615 | + | |
19616 | + /* No duplicate was found. */ | |
19617 | + return 0; | |
19618 | +} | |
19619 | + | |
19620 | +/** | |
19621 | + * verify_pv_uuid | |
19622 | + * | |
19623 | + * Verify that the specified PV belongs in the specified group by | |
19624 | + * searching for the PV's UUID in the group's list. | |
19625 | + **/ | |
19626 | +static int verify_pv_uuid(struct lvm_physical_volume * pv_entry, | |
19627 | + struct lvm_volume_group * group) | |
19628 | +{ | |
19629 | + int i; | |
19630 | + | |
19631 | + /* Obviously the UUID list must be present in order to search. */ | |
19632 | + if (!group->uuid_list) { | |
19633 | + LOG_WARNING("UUID list is missing from group %s.\n", | |
19634 | + group->vg_name); | |
19635 | + LOG_WARNING("Cannot verify UUID for PV %s\n", | |
19636 | + pv_entry->logical_node->name); | |
19637 | + return 0; | |
19638 | + } | |
19639 | + | |
19640 | + /* Start with the UUID entry for this PV's number. */ | |
19641 | + if ( ! memcmp(pv_entry->pv->pv_uuid, | |
19642 | + &(group->uuid_list[(pv_entry->pv_number - 1) * NAME_LEN]), | |
19643 | + UUID_LEN) ) { | |
19644 | + return 0; | |
19645 | + } | |
19646 | + | |
19647 | + /* If it wasn't found there, then search the entire group's list. */ | |
19648 | + for ( i = 0; i < group->vg->pv_cur; i++ ) { | |
19649 | + if ( ! memcmp(pv_entry->pv->pv_uuid, | |
19650 | + &(group->uuid_list[i * NAME_LEN]), UUID_LEN) ) { | |
19651 | + /* Found the UUID. */ | |
19652 | + LOG_WARNING("Detected UUID mismatch for PV %s!\n", | |
19653 | + pv_entry->logical_node->name); | |
19654 | + LOG_WARNING("PV %s is recorded as being at index %d,\n", | |
19655 | + pv_entry->logical_node->name, | |
19656 | + pv_entry->pv_number); | |
19657 | + LOG_WARNING(" but Group %s has it recorded at index %d.\n", | |
19658 | + group->vg_name, i + 1); | |
19659 | + LOG_WARNING("Run the EVMS Engine to correct the problem.\n"); | |
19660 | + LOG_WARNING("If you have any snapshot regions in group %s\n", | |
19661 | + group->vg_name); | |
19662 | + LOG_WARNING(" it is recommended that you delete them immediately!\n"); | |
19663 | + return 0; | |
19664 | + } | |
19665 | + } | |
19666 | + | |
19667 | + LOG_SERIOUS("Could not find UUID for PV %s in group %s\n", | |
19668 | + pv_entry->logical_node->name, group->vg_name); | |
19669 | + return -EINVAL; | |
19670 | +} | |
19671 | + | |
19672 | +/** | |
19673 | + * add_pv_to_group | |
19674 | + * | |
19675 | + * Adds the physical volume to the appropriate volume group. The PV | |
19676 | + * passed into this function MUST be part of a valid VG. | |
19677 | + **/ | |
19678 | +static int add_pv_to_group(struct lvm_physical_volume * pv_entry, | |
19679 | + struct lvm_volume_group * group) | |
19680 | +{ | |
19681 | + int rc; | |
19682 | + | |
19683 | + /* Make sure this PV's UUID is listed in the group. */ | |
19684 | + rc = verify_pv_uuid(pv_entry, group); | |
19685 | + if (rc) { | |
19686 | + LOG_SERIOUS("PV %s does not belong in group %s!\n", | |
19687 | + pv_entry->logical_node->name, group->vg_name); | |
19688 | + return rc; | |
19689 | + } | |
19690 | + | |
19691 | + /* Add this PV to the beginning of its group's list. */ | |
19692 | + pv_entry->next = group->pv_list; | |
19693 | + group->pv_list = pv_entry; | |
19694 | + group->pv_count++; | |
19695 | + | |
19696 | + /* Update the group's block and hardsector sizes as appropriate. */ | |
19697 | + group->block_size = max(pv_entry->logical_node->block_size, | |
19698 | + group->block_size); | |
19699 | + group->hard_sect_size = max(pv_entry->logical_node->hardsector_size, | |
19700 | + group->hard_sect_size); | |
19701 | + | |
19702 | + /* Check for the Partial or Removable flag on the PV. */ | |
19703 | + if ( pv_entry->logical_node->flags & EVMS_VOLUME_PARTIAL ) { | |
19704 | + group->flags |= EVMS_VG_PARTIAL_PVS; | |
19705 | + } | |
19706 | + if ( pv_entry->logical_node->flags & EVMS_DEVICE_REMOVABLE ) { | |
19707 | + group->flags |= EVMS_VG_REMOVABLE_PVS; | |
19708 | + } | |
19709 | + | |
19710 | + LOG_DETAILS("PV %s added to Group %s\n", | |
19711 | + pv_entry->logical_node->name, group->vg_name); | |
19712 | + | |
19713 | + return 0; | |
19714 | +} | |
19715 | + | |
19716 | +/** | |
19717 | + * discover_volume_groups | |
19718 | + * | |
19719 | + * Examine the list of logical nodes. Any node that contains a valid PV | |
19720 | + * structure is consumed and added to the appropriate volume group. PVs | |
19721 | + * which do not belong to any group are deleted. Everything else is left | |
19722 | + * on the discovery list. | |
19723 | + **/ | |
19724 | +static int discover_volume_groups(struct evms_logical_node ** evms_node_list) | |
19725 | +{ | |
19726 | + struct evms_logical_node * node, * next_node; | |
19727 | + struct pv_disk * pv; | |
19728 | + struct lvm_volume_group * group; | |
19729 | + struct lvm_physical_volume * pv_entry; | |
19730 | + int rc; | |
19731 | + | |
19732 | + LOG_EXTRA("Searching for PVs in the node list.\n"); | |
19733 | + | |
19734 | + /* Run through the discovery list. */ | |
19735 | + for ( node = *evms_node_list; node; node = next_node ) { | |
19736 | + /* Save the next node. We may remove this one from the list. */ | |
19737 | + next_node = node->next; | |
19738 | + | |
19739 | + /* Read the PV metadata. This will also create a new struct pv_disk | |
19740 | + * if it finds the correct LVM signatures. | |
19741 | + */ | |
19742 | + rc = read_pv(node, &pv); | |
19743 | + if (rc) { | |
19744 | + /* This node is not an LVM PV, or an error occurred. | |
19745 | + * Just leave the node on the discovery list. | |
19746 | + */ | |
19747 | + continue; | |
19748 | + } | |
19749 | + | |
19750 | + rc = find_group_for_pv(node, pv, &group); | |
19751 | + if (rc) { | |
19752 | + /* Error getting the group for this PV. */ | |
19753 | + kfree(pv); | |
19754 | + continue; | |
19755 | + } | |
19756 | + | |
19757 | + if (!group) { | |
19758 | + /* This node is an unassigned PV. */ | |
19759 | + LOG_DETAILS("PV %s is unassigned.\n", node->name); | |
19760 | + kfree(pv); | |
19761 | + continue; | |
19762 | + } | |
19763 | + | |
19764 | + rc = check_for_duplicate_pv(node, pv, group); | |
19765 | + if (rc) { | |
19766 | + /* This node is already in the group. This check is also | |
19767 | + * only in the kernel because the engine has no notion | |
19768 | + * of rediscover, and thus can never get a duplicate. | |
19769 | + */ | |
19770 | + evms_cs_remove_logical_node_from_list(evms_node_list, | |
19771 | + node); | |
19772 | + continue; | |
19773 | + } | |
19774 | + | |
19775 | + /* Allocate a PV entry for this node. */ | |
19776 | + pv_entry = allocate_physical_volume(node, pv); | |
19777 | + if (!pv_entry) { | |
19778 | + continue; | |
19779 | + } | |
19780 | + | |
19781 | + /* Add this PV to the appropriate volume group. */ | |
19782 | + rc = add_pv_to_group(pv_entry, group); | |
19783 | + if (rc) { | |
19784 | + deallocate_physical_volume(pv_entry); | |
19785 | + continue; | |
19786 | + } | |
19787 | + | |
19788 | + rc = read_pe_map(pv_entry); | |
19789 | + if (rc) { | |
19790 | + LOG_WARNING("Error reading PE maps for node %s\n", | |
19791 | + node->name); | |
19792 | + LOG_WARNING("Any volumes residing on this node will be incomplete!\n"); | |
19793 | + } | |
19794 | + | |
19795 | + evms_cs_remove_logical_node_from_list(evms_node_list, node); | |
19796 | + } | |
19797 | + | |
19798 | + LOG_EXTRA("Group discovery complete.\n"); | |
19799 | + return 0; | |
19800 | +} | |
19801 | + | |
19802 | + | |
19803 | +/********** Logical Volume Discovery Functions **********/ | |
19804 | + | |
19805 | + | |
19806 | +/** | |
19807 | + * build_le_maps | |
19808 | + * | |
19809 | + * After all logical volumes have been discovered, the mappings from | |
19810 | + * logical extents to physical extents must be constructed. Each PV | |
19811 | + * contains a map on-disk of its PEs. Each PE map entry contains the | |
19812 | + * logical volume number and the logical extent number on that volume. | |
19813 | + * Our internal map is the reverse of this map for each volume, listing | |
19814 | + * the PV node and sector offset for every logical extent on the volume. | |
19815 | + **/ | |
19816 | +static int build_le_maps(struct lvm_volume_group * group) | |
19817 | +{ | |
19818 | + struct lvm_logical_volume ** volume_list = group->volume_list; | |
19819 | + struct lvm_physical_volume * pv_entry; | |
19820 | + struct evms_logical_node * node; | |
19821 | + struct pv_disk * pv; | |
19822 | + struct pe_disk * pe_map; | |
19823 | + u64 offset; | |
19824 | + u32 lv_number, le_number, first_pe_sector; | |
19825 | + int i; | |
19826 | + | |
19827 | + LOG_DEBUG("Building LE maps for new volumes in group %s.\n", | |
19828 | + group->vg_name); | |
19829 | + | |
19830 | + /* For every PV in this VG. */ | |
19831 | + for ( pv_entry = group->pv_list; pv_entry; pv_entry = pv_entry->next ) { | |
19832 | + node = pv_entry->logical_node; | |
19833 | + pv = pv_entry->pv; | |
19834 | + pe_map = pv_entry->pe_map; | |
19835 | + | |
19836 | + /* Version 1 metadata uses pe_on_disk.base + .size to find start | |
19837 | + * of first PE. Version 2 uses pe_start. | |
19838 | + */ | |
19839 | + if (pv->version == 1) { | |
19840 | + first_pe_sector = | |
19841 | + evms_cs_size_in_vsectors(pv->pe_on_disk.base + | |
19842 | + pv->pe_on_disk.size); | |
19843 | + } else { | |
19844 | + first_pe_sector = pv->pe_start; | |
19845 | + if (!first_pe_sector) { | |
19846 | + first_pe_sector = | |
19847 | + evms_cs_size_in_vsectors(pv->pe_on_disk.base + | |
19848 | + pv->pe_on_disk.size); | |
19849 | + } | |
19850 | + } | |
19851 | + | |
19852 | + /* For every entry in the PE map, calculate the PE's sector offset | |
19853 | + * and update the correct LV's PE map. LV number of 0 marks an unused PE. | |
19854 | + * For re-discovery, only compute entries for new volumes. If a PV | |
19855 | + * is read-only, all LVs on that PV will also be read-only. | |
19856 | + */ | |
19857 | + for ( i = 0; i < pv->pe_total; i++ ) { | |
19858 | + lv_number = pe_map[i].lv_num; | |
19859 | + if ( lv_number && | |
19860 | + volume_list[lv_number] && | |
19861 | + volume_list[lv_number]->lv_access & | |
19862 | + (EVMS_LV_NEW | EVMS_LV_INCOMPLETE) ) { | |
19863 | + le_number = pe_map[i].le_num; | |
19864 | + offset = i * pv->pe_size + first_pe_sector; | |
19865 | + volume_list[lv_number]->le_map[le_number].owning_pv = | |
19866 | + pv_entry; | |
19867 | + volume_list[lv_number]->le_map[le_number].pe_sector_offset = | |
19868 | + offset; | |
19869 | + if ( node->flags & EVMS_VOLUME_SET_READ_ONLY ) { | |
19870 | + volume_list[lv_number]->lv_access &= | |
19871 | + ~LV_WRITE; | |
19872 | + } | |
19873 | + } | |
19874 | + } | |
19875 | + } | |
19876 | + | |
19877 | + return 0; | |
19878 | +} | |
19879 | + | |
19880 | +/** | |
19881 | + * build_snapshot_maps | |
19882 | + * | |
19883 | + * For every volume in this group that is a snapshot, read all of the | |
19884 | + * existing entries in the COW table, and build up the snapshot mapping | |
19885 | + * structures accordingly. | |
19886 | + * | |
19887 | + * For reference, the COW tables attached to the snapshot volumes will | |
19888 | + * always be in disk-order (little-endian), so that it can always be | |
19889 | + * immediately written to disk. Therefore, endian conversions are necessary | |
19890 | + * any time the COW table is accessed. This function will make a local | |
19891 | + * copy of each COW table sector, and convert the local copy before | |
19892 | + * building the snapshot maps. | |
19893 | + **/ | |
19894 | +static int build_snapshot_maps(struct lvm_volume_group * group) | |
19895 | +{ | |
19896 | + struct lvm_logical_volume * volume; | |
19897 | + struct evms_logical_node tmp_node; | |
19898 | + struct lv_COW_table_disk cow_table[EVMS_VSECTOR_SIZE / | |
19899 | + sizeof(struct lv_COW_table_disk)]; | |
19900 | + unsigned long max_entries = EVMS_VSECTOR_SIZE / | |
19901 | + sizeof(struct lv_COW_table_disk); | |
19902 | + int i, j; | |
19903 | + | |
19904 | + /* Check every volume in the group to see if it is a snapshot. Also | |
19905 | + * check to make sure it is a new volume in the case of re-discovery. | |
19906 | + */ | |
19907 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
19908 | + | |
19909 | + /* The volume must exist, must be new, and must be a snapshot. | |
19910 | + */ | |
19911 | + volume = group->volume_list[i]; | |
19912 | + if ( !volume || | |
19913 | + !(volume->lv_access & EVMS_LV_NEW) || | |
19914 | + !(volume->lv_access & LV_SNAPSHOT)) { | |
19915 | + continue; | |
19916 | + } | |
19917 | + | |
19918 | + /* Set up a temporary EVMS node. */ | |
19919 | + tmp_node.private = volume; | |
19920 | + | |
19921 | + LOG_DEBUG("Building snapshot map for volume %s\n", | |
19922 | + volume->name); | |
19923 | + | |
19924 | + while (1) { | |
19925 | + /* Read in one sector's worth of COW tables. */ | |
19926 | + if ( lvm_init_io(&tmp_node, 0, | |
19927 | + volume->current_cow_sector, | |
19928 | + 1, volume->cow_table) ) { | |
19929 | + goto error; | |
19930 | + } | |
19931 | + | |
19932 | + /* Endian-conversion of this COW table | |
19933 | + * to a local table. | |
19934 | + */ | |
19935 | + for ( j = 0; j < max_entries; j++ ) { | |
19936 | + cow_table[j].pv_org_number = | |
19937 | + le64_to_cpu(volume->cow_table[j].pv_org_number); | |
19938 | + cow_table[j].pv_org_rsector = | |
19939 | + le64_to_cpu(volume->cow_table[j].pv_org_rsector); | |
19940 | + cow_table[j].pv_snap_number = | |
19941 | + le64_to_cpu(volume->cow_table[j].pv_snap_number); | |
19942 | + cow_table[j].pv_snap_rsector = | |
19943 | + le64_to_cpu(volume->cow_table[j].pv_snap_rsector); | |
19944 | + } | |
19945 | + | |
19946 | + /* Translate every valid COW table entry into | |
19947 | + * a snapshot map entry. | |
19948 | + */ | |
19949 | + for ( volume->next_cow_entry = 0; | |
19950 | + volume->next_cow_entry < max_entries && | |
19951 | + cow_table[volume->next_cow_entry].pv_org_number; | |
19952 | + volume->next_cow_entry++ ) { | |
19953 | + /* org_rsector must be a valid sector number, | |
19954 | + * i.e. it can't be within a PVs metadata. This | |
19955 | + * is how we detect invalidated snapshots. | |
19956 | + */ | |
19957 | + if ( cow_table[volume->next_cow_entry].pv_org_rsector < 10 || | |
19958 | + cow_table[volume->next_cow_entry].pv_org_number > group->pv_count || | |
19959 | + add_cow_entry_to_snapshot_map(&(cow_table[volume->next_cow_entry]), volume) ) { | |
19960 | + /* This volume either has an invalid COW entry, | |
19961 | + * or had an error adding that COW entry to the | |
19962 | + * snapshot map. This snapshot is done. | |
19963 | + */ | |
19964 | + goto error; | |
19965 | + } | |
19966 | + volume->next_free_chunk += volume->chunk_size; | |
19967 | + } | |
19968 | + | |
19969 | + /* Move on to the next sector if necessary. */ | |
19970 | + if ( volume->next_cow_entry == max_entries ) { | |
19971 | + volume->current_cow_sector++; | |
19972 | + } else { | |
19973 | + break; | |
19974 | + } | |
19975 | + } | |
19976 | + } | |
19977 | + | |
19978 | +out: | |
19979 | + return 0; | |
19980 | +error: | |
19981 | + invalidate_snapshot_volume(volume); | |
19982 | + deallocate_logical_volume(volume); | |
19983 | + goto out; | |
19984 | +} | |
19985 | + | |
19986 | +/** | |
19987 | + * link_snapshot_volumes | |
19988 | + * | |
19989 | + * This function examines the list of logical volumes in this group and | |
19990 | + * sets up the necessary pointers to link snapshots and their originals. | |
19991 | + * A singly-linked list is created starting with the original volume. Also, | |
19992 | + * all snapshot volumes point directly back to their original. This | |
19993 | + * function should not be run until all volumes have been discovered. | |
19994 | + * In the case of re-discovery, all of these links/lists get rebuilt as if | |
19995 | + * they were not already there. Currently this should not pose a problem. | |
19996 | + **/ | |
19997 | +static int link_snapshot_volumes(struct lvm_volume_group * group) | |
19998 | +{ | |
19999 | + struct lvm_logical_volume * org_volume, * snap_volume; | |
20000 | + u32 org_minor, buffer_size = 0; | |
20001 | + int i, j; | |
20002 | + | |
20003 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
20004 | + | |
20005 | + /* Only process snapshot-originals. */ | |
20006 | + org_volume = group->volume_list[i]; | |
20007 | + if ( !org_volume || !(org_volume->lv_access & LV_SNAPSHOT_ORG) ) { | |
20008 | + continue; | |
20009 | + } | |
20010 | + | |
20011 | + /* For snapshot-originals, look for all other volumes that | |
20012 | + * claim to be snapshotting it. For each one that is found, | |
20013 | + * insert it at the start of the original's list of snapshots. | |
20014 | + * Need to start with a NULL snapshot_next, otherwise could | |
20015 | + * wind up with circular lists. | |
20016 | + */ | |
20017 | + org_minor = org_volume->lv_minor; | |
20018 | + org_volume->snapshot_next = NULL; | |
20019 | + | |
20020 | + for ( j = 1; j <= MAX_LV; j++ ) { | |
20021 | + snap_volume = group->volume_list[j]; | |
20022 | + if ( snap_volume && | |
20023 | + snap_volume->lv_access & LV_SNAPSHOT && | |
20024 | + (snap_volume->snap_org_minor == org_minor) ) { | |
20025 | + snap_volume->snapshot_org = org_volume; | |
20026 | + snap_volume->snapshot_next = | |
20027 | + org_volume->snapshot_next; | |
20028 | + org_volume->snapshot_next = snap_volume; | |
20029 | + if ( snap_volume->chunk_size > buffer_size ) { | |
20030 | + buffer_size = snap_volume->chunk_size; | |
20031 | + } | |
20032 | + LOG_DEBUG("Linking snapshot (%s) to original (%s)\n", | |
20033 | + snap_volume->name, org_volume->name); | |
20034 | + } | |
20035 | + } | |
20036 | + | |
20037 | + /* If no snapshots were found for a volume that claims to be | |
20038 | + * under snapshot, mark the group dirty. If this is final | |
20039 | + * discovery, the original will have the snapshot flag turned | |
20040 | + * off in check_logical_volumes(). | |
20041 | + */ | |
20042 | + if (!org_volume->snapshot_next) { | |
20043 | + LOG_WARNING("No snapshots found for original (%s)\n", | |
20044 | + org_volume->name); | |
20045 | + group->flags |= EVMS_VG_DIRTY; | |
20046 | + } | |
20047 | + } | |
20048 | + return 0; | |
20049 | +} | |
20050 | + | |
20051 | +/** | |
20052 | + * discover_volumes_in_group | |
20053 | + **/ | |
20054 | +static int discover_volumes_in_group(struct lvm_volume_group * group) | |
20055 | +{ | |
20056 | + struct lv_disk * lv_array = group->lv_array; | |
20057 | + struct lvm_logical_volume * new_volume; | |
20058 | + int i; | |
20059 | + | |
20060 | + /* Search through the LV structs for valid LV entries. */ | |
20061 | + for ( i = 0; i < group->vg->lv_max; i++ ) { | |
20062 | + | |
20063 | + /* Only discover valid, active volumes. */ | |
20064 | + if ( !lv_array[i].lv_name[0] || | |
20065 | + lv_array[i].lv_number >= MAX_LV ) { | |
20066 | + continue; | |
20067 | + } | |
20068 | + | |
20069 | + /* Make sure this volume isn't already in the list. */ | |
20070 | + if (group->volume_list[lv_array[i].lv_number + 1]) { | |
20071 | + continue; | |
20072 | + } | |
20073 | + | |
20074 | + /* Create a new logical volume and place it in the appropriate | |
20075 | + * spot in this VG's volume list. | |
20076 | + */ | |
20077 | + new_volume = allocate_logical_volume(&(lv_array[i]), group); | |
20078 | + if (!new_volume) { | |
20079 | + /* This volume will be missing, but other | |
20080 | + * volumes in this group can still be built. | |
20081 | + */ | |
20082 | + LOG_CRITICAL("Error allocating LV %s in Group %s\n", | |
20083 | + lv_array[i].lv_name, group->vg_name); | |
20084 | + continue; | |
20085 | + } | |
20086 | + | |
20087 | + group->volume_list[new_volume->lv_number] = new_volume; | |
20088 | + group->volume_count++; | |
20089 | + group->flags |= EVMS_VG_DIRTY; | |
20090 | + | |
20091 | + LOG_DEBUG("Discovered volume %s in group %s.\n", | |
20092 | + new_volume->name, group->vg_name); | |
20093 | + } | |
20094 | + | |
20095 | + return 0; | |
20096 | +} | |
20097 | + | |
20098 | +/** | |
20099 | + * discover_logical_volumes | |
20100 | + * | |
20101 | + * After all PVs have been claimed and added to the appropriate VG list, | |
20102 | + * the volumes for each VG must be constructed. For each group, read all | |
20103 | + * the LV structs off the first PV in the list. Search this list of | |
20104 | + * structs for valid LVs. For each valid LV, create a new volume and add | |
20105 | + * it to the group. | |
20106 | + **/ | |
20107 | +static int discover_logical_volumes(int final_discovery) | |
20108 | +{ | |
20109 | + struct lvm_volume_group *group; | |
20110 | + int rc; | |
20111 | + | |
20112 | + /* Look for volumes in each valid VG entry. We even need to check ones | |
20113 | + * that aren't dirty - We could have deleted an incomplete volume on | |
20114 | + * the previous pass, and need to rediscover it in case this is final | |
20115 | + * discovery and we now want to export it. | |
20116 | + */ | |
20117 | + for ( group = lvm_group_list; group; group = group->next_group ) { | |
20118 | + | |
20119 | + if ( ! group->vg || | |
20120 | + (! final_discovery && | |
20121 | + ! (group->flags & EVMS_VG_DIRTY)) ) { | |
20122 | + continue; | |
20123 | + } | |
20124 | + | |
20125 | + LOG_DEBUG("Searching for volumes in group %s\n", | |
20126 | + group->vg_name); | |
20127 | + | |
20128 | + /* Read in the LV array from disk if necessary. */ | |
20129 | + rc = read_lv(group); | |
20130 | + if (rc) { | |
20131 | + LOG_WARNING("Unable to read LV metadata for group %s\n", | |
20132 | + group->vg_name); | |
20133 | + LOG_WARNING("No regions can be discovered for group %s\n", | |
20134 | + group->vg_name); | |
20135 | + continue; | |
20136 | + } | |
20137 | + | |
20138 | + /* Assemble each volume in the group. */ | |
20139 | + discover_volumes_in_group(group); | |
20140 | + | |
20141 | + /* Build the LE map for each LV discovered in this group. This | |
20142 | + * must be done after all LVS in the group are discovered. | |
20143 | + */ | |
20144 | + build_le_maps(group); | |
20145 | + check_le_maps(group); | |
20146 | + | |
20147 | + /* Set up all of the initial snapshot maps. Only the kernel | |
20148 | + * keeps track of the snapshot maps. | |
20149 | + */ | |
20150 | + build_snapshot_maps(group); | |
20151 | + | |
20152 | + /* Set up the pointers to link snapshot volumes | |
20153 | + * with their originals. | |
20154 | + */ | |
20155 | + link_snapshot_volumes(group); | |
20156 | + } | |
20157 | + | |
20158 | + return 0; | |
20159 | +} | |
20160 | + | |
20161 | +/** | |
20162 | + * export_volumes | |
20163 | + * | |
20164 | + * The last thing the plugin must do is take each newly constructed volume | |
20165 | + * and place it on the evms logical node list. A zero return-code from | |
20166 | + * this function means nothing new was added to the list, and a positive | |
20167 | + * return code means that many new items were added to the list. | |
20168 | + **/ | |
20169 | +static int export_volumes(struct evms_logical_node ** evms_node_list, | |
20170 | + int final_discover) | |
20171 | +{ | |
20172 | + struct lvm_volume_group * group; | |
20173 | + struct evms_logical_node * new_node; | |
20174 | + struct lvm_logical_volume * volume; | |
20175 | + int i, count = 0; | |
20176 | + | |
20177 | + LOG_EXTRA("Exporting volumes\n"); | |
20178 | + | |
20179 | + /* For every valid, dirty volume group. */ | |
20180 | + for ( group = lvm_group_list; group; group = group->next_group ) { | |
20181 | + if ( ! (group->flags & EVMS_VG_DIRTY) ) { | |
20182 | + continue; | |
20183 | + } | |
20184 | + | |
20185 | + /* Export every valid volume in the group. For re-discovery, | |
20186 | + * we re-export the same logical node. | |
20187 | + */ | |
20188 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
20189 | + volume = group->volume_list[i]; | |
20190 | + if (!volume) { | |
20191 | + continue; | |
20192 | + } | |
20193 | + | |
20194 | + /* For new volumes, create a new EVMS node and | |
20195 | + * initialize the appropriate fields. | |
20196 | + */ | |
20197 | + if ( volume->lv_access & EVMS_LV_NEW ) { | |
20198 | + if ( evms_cs_allocate_logical_node(&new_node) ) { | |
20199 | + continue; | |
20200 | + } | |
20201 | + MOD_INC_USE_COUNT; | |
20202 | + | |
20203 | + volume->volume_node = new_node; | |
20204 | + volume->lv_access &= (~EVMS_LV_QUIESCED & | |
20205 | + ~EVMS_LV_NEW); | |
20206 | + new_node->hardsector_size = | |
20207 | + group->hard_sect_size; | |
20208 | + new_node->block_size = group->block_size; | |
20209 | + new_node->plugin = &lvm_plugin_header; | |
20210 | + new_node->private = volume; | |
20211 | + memcpy(new_node->name, volume->name, NAME_LEN); | |
20212 | + | |
20213 | + /* Snapshot volumes should report the | |
20214 | + * size of their original. | |
20215 | + */ | |
20216 | + new_node->total_vsectors = | |
20217 | + (volume->lv_access & LV_SNAPSHOT) ? | |
20218 | + volume->snapshot_org->lv_size : | |
20219 | + volume->lv_size; | |
20220 | + | |
20221 | + /* Is the volume read-only? */ | |
20222 | + if ( ! (volume->lv_access & LV_WRITE) ) { | |
20223 | + new_node->flags |= | |
20224 | + EVMS_VOLUME_READ_ONLY; | |
20225 | + LOG_DEBUG("LVM volume %s is read-only\n", | |
20226 | + volume->name); | |
20227 | + } | |
20228 | + | |
20229 | + /* Is the volume incomplete? */ | |
20230 | + if ( volume->lv_access & EVMS_LV_INCOMPLETE ) { | |
20231 | + new_node->flags |= | |
20232 | + (EVMS_VOLUME_READ_ONLY | | |
20233 | + EVMS_VOLUME_PARTIAL); | |
20234 | + LOG_DEBUG("LVM volume %s is incomplete\n", | |
20235 | + volume->name); | |
20236 | + } | |
20237 | + | |
20238 | + /* Does the volume group contain any partial or | |
20239 | + * removable PVs? | |
20240 | + */ | |
20241 | + if ( group->flags & EVMS_VG_PARTIAL_PVS ) { | |
20242 | + new_node->flags |= EVMS_VOLUME_PARTIAL; | |
20243 | + } | |
20244 | + if ( group->flags & EVMS_VG_REMOVABLE_PVS ) { | |
20245 | + new_node->flags |= | |
20246 | + EVMS_DEVICE_REMOVABLE; | |
20247 | + } | |
20248 | + } | |
20249 | + | |
20250 | + /* Export the node, only if it hasn't been exported | |
20251 | + * during this full EVMS discover. | |
20252 | + */ | |
20253 | + if ( ! (volume->lv_access & EVMS_LV_EXPORTED) ) { | |
20254 | + if ( ! evms_cs_add_logical_node_to_list(evms_node_list, | |
20255 | + volume->volume_node) ) { | |
20256 | + LOG_DETAILS("Exporting LVM volume %s\n", | |
20257 | + volume->name); | |
20258 | + volume->lv_access |= EVMS_LV_EXPORTED; | |
20259 | + count++; | |
20260 | + } | |
20261 | + } | |
20262 | + | |
20263 | + if (final_discover) { | |
20264 | + volume->lv_access &= ~EVMS_LV_EXPORTED; | |
20265 | + } | |
20266 | + } | |
20267 | + | |
20268 | + /* The group is clean now. */ | |
20269 | + group->flags &= ~EVMS_VG_DIRTY; | |
20270 | + } | |
20271 | + | |
20272 | + return count; | |
20273 | +} | |
20274 | + | |
20275 | +/** | |
20276 | + * lvm_cleanup | |
20277 | + * | |
20278 | + * This function runs through the entire lvm data structure, removing | |
20279 | + * all items that are not needed at runtime. Currently, this is just the | |
20280 | + * struct vg_disk structure and the struct pv_disk structure for each PV. | |
20281 | + * Also, any groups that don't contain any volumes are deleted. All of the | |
20282 | + * other volume_group, logical_volume and evms_logical_node structures will | |
20283 | + * be kept around at run-time. | |
20284 | + **/ | |
20285 | +static int lvm_cleanup(void) | |
20286 | +{ | |
20287 | + struct lvm_volume_group * group, * next_group; | |
20288 | + struct lvm_physical_volume * pv_entry; | |
20289 | + | |
20290 | + for ( group = lvm_group_list; group; group = next_group ) { | |
20291 | + next_group = group->next_group; | |
20292 | + | |
20293 | + /* Delete groups with no volumes. */ | |
20294 | + if (!group->volume_count) { | |
20295 | + LOG_WARNING("Group %s contains no logical volumes. Deleting.\n", | |
20296 | + group->vg_name); | |
20297 | + remove_group_from_list(group); | |
20298 | + deallocate_volume_group(group); | |
20299 | + /* Need to go back to the start of the list, | |
20300 | + * just to be safe. :) | |
20301 | + */ | |
20302 | + next_group = lvm_group_list; | |
20303 | + continue; | |
20304 | + } | |
20305 | + | |
20306 | + /* Delete data structures that aren't used at runtime. */ | |
20307 | + if (group->vg) { | |
20308 | + kfree(group->vg); | |
20309 | + group->vg = NULL; | |
20310 | + } | |
20311 | + | |
20312 | + for ( pv_entry = group->pv_list; | |
20313 | + pv_entry; pv_entry = pv_entry->next) { | |
20314 | + if (pv_entry->pv) { | |
20315 | + kfree(pv_entry->pv); | |
20316 | + pv_entry->pv = NULL; | |
20317 | + } | |
20318 | + if (pv_entry->pe_map) { | |
20319 | + vfree(pv_entry->pe_map); | |
20320 | + pv_entry->pe_map = NULL; | |
20321 | + } | |
20322 | + } | |
20323 | + if (group->lv_array) { | |
20324 | + vfree(group->lv_array); | |
20325 | + group->lv_array = NULL; | |
20326 | + } | |
20327 | + if (group->uuid_list) { | |
20328 | + vfree(group->uuid_list); | |
20329 | + group->uuid_list = NULL; | |
20330 | + } | |
20331 | + } | |
20332 | + return 0; | |
20333 | +} | |
20334 | + | |
20335 | +/** | |
20336 | + * lvm_get_bmap | |
20337 | + * | |
20338 | + * Support for the BMAP ioctl used by LILO to translate filesystem blocks | |
20339 | + * to disk blocks to map kernel images for boot time. | |
20340 | + **/ | |
20341 | +static int lvm_get_bmap(struct evms_logical_node * node, | |
20342 | + struct evms_get_bmap_pkt * bmap, | |
20343 | + struct evms_logical_node ** pv_node) | |
20344 | +{ | |
20345 | + struct lvm_logical_volume * volume = node->private; | |
20346 | + struct lvm_physical_volume * pv_entry; | |
20347 | + u64 pe_start_sector, new_sector = 0, new_size = 0; | |
20348 | + int rc = 0; | |
20349 | + | |
20350 | + /* No kernel images allowed on snapshot LVs. */ | |
20351 | + if ( volume->lv_access & LV_SNAPSHOT ) { | |
20352 | + return -EINVAL; | |
20353 | + } | |
20354 | + | |
20355 | + /* Range check. */ | |
20356 | + if ( bmap->rsector >= volume->lv_size ) { | |
20357 | + return -EINVAL; | |
20358 | + } | |
20359 | + | |
20360 | + rc = remap_sector(node, bmap->rsector, 1, &new_sector, | |
20361 | + &new_size, &pe_start_sector, &pv_entry); | |
20362 | + | |
20363 | + if (rc || !pv_entry || !new_sector) { | |
20364 | + return -EINVAL; | |
20365 | + } | |
20366 | + | |
20367 | + bmap->rsector = new_sector; | |
20368 | + *pv_node = pv_entry->logical_node; | |
20369 | + | |
20370 | + return 0; | |
20371 | +} | |
20372 | + | |
20373 | +/** | |
20374 | + * lvm_global_proc_read | |
20375 | + * | |
20376 | + * A callback function for the lvm-global proc-fs entry. This will print | |
20377 | + * general info about all LVM VGs, PVs, and LVs. | |
20378 | + **/ | |
20379 | +static int lvm_global_proc_read(char * page, char ** start, off_t off, | |
20380 | + int count, int * eof, void * data) | |
20381 | +{ | |
20382 | + struct lvm_volume_group * group; | |
20383 | + struct lvm_physical_volume * pv_entry; | |
20384 | + struct lvm_logical_volume * volume, * snap; | |
20385 | + int vgs = 0, lvs = 0, pvs = 0; | |
20386 | + int i, sz = 0; | |
20387 | + | |
20388 | + PROCPRINT("Enterprise Volume Management System: LVM Plugin\n"); | |
20389 | + PROCPRINT("Plugin ID: %x.%x.%x\n", | |
20390 | + GetPluginOEM(lvm_plugin_header.id), | |
20391 | + GetPluginType(lvm_plugin_header.id), | |
20392 | + GetPluginID(lvm_plugin_header.id)); | |
20393 | + PROCPRINT("Plugin Version: %d.%d.%d\n", | |
20394 | + lvm_plugin_header.version.major, | |
20395 | + lvm_plugin_header.version.minor, | |
20396 | + lvm_plugin_header.version.patchlevel); | |
20397 | + PROCPRINT("Required EVMS Services Version: %d.%d.%d\n", | |
20398 | + lvm_plugin_header.required_services_version.major, | |
20399 | + lvm_plugin_header.required_services_version.minor, | |
20400 | + lvm_plugin_header.required_services_version.patchlevel); | |
20401 | + | |
20402 | + /* Count all existing items. */ | |
20403 | + for ( group = lvm_group_list; group; group = group->next_group ) { | |
20404 | + lvs += group->volume_count; | |
20405 | + pvs += group->pv_count; | |
20406 | + vgs++; | |
20407 | + } | |
20408 | + | |
20409 | + PROCPRINT("\n"); | |
20410 | + PROCPRINT("Total: %d VGs %d PVs %d LVs\n", vgs, pvs, lvs); | |
20411 | + | |
20412 | + /* Print out specifics about each VG. */ | |
20413 | + for ( group = lvm_group_list; group; group = group->next_group ) { | |
20414 | + PROCPRINT("\n"); | |
20415 | + PROCPRINT("VG: %s [%d PV, %d LV]\n", | |
20416 | + group->vg_name, group->pv_count, group->volume_count); | |
20417 | + PROCPRINT("PVs:\n"); | |
20418 | + for ( pv_entry = group->pv_list; | |
20419 | + pv_entry; pv_entry = pv_entry->next ) { | |
20420 | + if (pv_entry->logical_node) { | |
20421 | + PROCPRINT("\t%s\t%10Ld KB\n", | |
20422 | + pv_entry->logical_node->name, | |
20423 | + (long long)pv_entry->logical_node->total_vsectors / 2); | |
20424 | + } | |
20425 | + } | |
20426 | + PROCPRINT("LVs:\n"); | |
20427 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
20428 | + if (group->volume_list[i]) { | |
20429 | + volume = group->volume_list[i]; | |
20430 | + PROCPRINT("\t%s\t%10Ld KB / %5d LEs", | |
20431 | + volume->name, | |
20432 | + (long long)volume->lv_size / 2, | |
20433 | + volume->num_le); | |
20434 | + if ( volume->lv_access & LV_SNAPSHOT ) { | |
20435 | + PROCPRINT("\tSnapshot of : "); | |
20436 | + if (volume->snapshot_org) { | |
20437 | + PROCPRINT("%s : ", | |
20438 | + volume->snapshot_org->name); | |
20439 | + } else { | |
20440 | + PROCPRINT("(unknown) : "); | |
20441 | + } | |
20442 | + PROCPRINT("%ld%% full : ", | |
20443 | + (long)(volume->next_free_chunk) * | |
20444 | + 100 / (long)(volume->lv_size)); | |
20445 | + if ( volume->lv_status & LV_ACTIVE ) { | |
20446 | + PROCPRINT("active"); | |
20447 | + } else { | |
20448 | + PROCPRINT("disabled"); | |
20449 | + } | |
20450 | + } else if ( volume->lv_access & LV_SNAPSHOT_ORG ) { | |
20451 | + PROCPRINT("\tSnapshotted by : "); | |
20452 | + for ( snap = volume->snapshot_next; | |
20453 | + snap; | |
20454 | + snap = snap->snapshot_next ) { | |
20455 | + PROCPRINT("%s ", snap->name); | |
20456 | + } | |
20457 | + } | |
20458 | + PROCPRINT("\n"); | |
20459 | + } | |
20460 | + } | |
20461 | + } | |
20462 | + | |
20463 | +out: | |
20464 | + *start = page + off; | |
20465 | + sz -= off; | |
20466 | + if (sz < 0) | |
20467 | + sz = 0; | |
20468 | + return sz > count ? count : sz; | |
20469 | +} | |
20470 | + | |
20471 | + | |
20472 | +/********** Required EVMS Plugin Functions **********/ | |
20473 | + | |
20474 | + | |
20475 | +/** | |
20476 | + * lvm_discover | |
20477 | + * | |
20478 | + * This is the entry point into the LVM discovery process. It is a three | |
20479 | + * phase process. First, the list of nodes are examined for PVs, and the | |
20480 | + * appropriate volume groups are created. Then each volume group is | |
20481 | + * examined to find all available logical volumes. Finally, each LVM | |
20482 | + * logical volume has a new EVMS node created for it, and added to the | |
20483 | + * list of nodes. | |
20484 | + **/ | |
20485 | +static int lvm_discover(struct evms_logical_node ** evms_node_list) | |
20486 | +{ | |
20487 | + int rc; | |
20488 | + | |
20489 | + MOD_INC_USE_COUNT; | |
20490 | + LOG_EXTRA("Beginning discovery.\n"); | |
20491 | + | |
20492 | + discover_volume_groups(evms_node_list); | |
20493 | + | |
20494 | + check_volume_groups(); | |
20495 | + | |
20496 | + discover_logical_volumes(FALSE); | |
20497 | + | |
20498 | + check_logical_volumes(FALSE); | |
20499 | + | |
20500 | + rc = export_volumes(evms_node_list, FALSE); | |
20501 | + | |
20502 | + LOG_EXTRA("Discovery complete.\n"); | |
20503 | + MOD_DEC_USE_COUNT; | |
20504 | + return rc; | |
20505 | +} | |
20506 | + | |
20507 | +/** | |
20508 | + * lvm_discover_end | |
20509 | + * | |
20510 | + * The discovery process at the region-manager level is now iterative, | |
20511 | + * much like the EVMS feature level. This allows the ability to stack | |
20512 | + * LVM on top of MD, or vice-versa. To accomplish this correctly, and | |
20513 | + * also to accomplish partial volume discovery, a second discover | |
20514 | + * entry point is needed, so EVMS can tell the region managers that | |
20515 | + * discovery is over, and to finish up any discovery that is not yet | |
20516 | + * complete. When this function is called, it should be assumed that | |
20517 | + * the node list has had nothing new added to it since the last call | |
20518 | + * of the regular discover function. Therefore, when this function is | |
20519 | + * called, we do not need to try to discovery any additional volume | |
20520 | + * groups. We will, however, look for logical volumes once more. This | |
20521 | + * gives us the ability to export (read-only) volumes that have | |
20522 | + * partially corrupted LE maps due to missing PVs in their VG. | |
20523 | + **/ | |
20524 | +static int lvm_discover_end(struct evms_logical_node ** evms_node_list) | |
20525 | +{ | |
20526 | + int rc; | |
20527 | + | |
20528 | + MOD_INC_USE_COUNT; | |
20529 | + LOG_EXTRA("Beginning final discovery\n"); | |
20530 | + | |
20531 | + discover_volume_groups(evms_node_list); | |
20532 | + | |
20533 | + check_volume_groups(); | |
20534 | + | |
20535 | + discover_logical_volumes(TRUE); | |
20536 | + | |
20537 | + check_logical_volumes(TRUE); | |
20538 | + | |
20539 | + rc = export_volumes(evms_node_list, TRUE); | |
20540 | + | |
20541 | + lvm_cleanup(); | |
20542 | + | |
20543 | + LOG_EXTRA("Final discovery complete.\n"); | |
20544 | + MOD_DEC_USE_COUNT; | |
20545 | + return rc; | |
20546 | +} | |
20547 | + | |
20548 | +/** | |
20549 | + * lvm_delete_node | |
20550 | + * | |
20551 | + * This function deletes the in-memory representation of an LVM logical volume. | |
20552 | + **/ | |
20553 | +static int lvm_delete_node(struct evms_logical_node * logical_node) | |
20554 | +{ | |
20555 | + struct lvm_logical_volume * volume = logical_node->private; | |
20556 | + struct lvm_volume_group * group = volume->group; | |
20557 | + | |
20558 | + LOG_DEBUG("Deleting LVM node %s\n", logical_node->name); | |
20559 | + | |
20560 | + if ( deallocate_logical_volume(volume) ) { | |
20561 | + return -EINVAL; | |
20562 | + } | |
20563 | + | |
20564 | + /* If we just removed the last volume from this group, the entire group | |
20565 | + * must also be deleted. | |
20566 | + */ | |
20567 | + if ( group && group->volume_count == 0 ) { | |
20568 | + remove_group_from_list(group); | |
20569 | + deallocate_volume_group(group); | |
20570 | + } | |
20571 | + | |
20572 | + /* Free the logical node. */ | |
20573 | + evms_cs_deallocate_logical_node(logical_node); | |
20574 | + MOD_DEC_USE_COUNT; | |
20575 | + return 0; | |
20576 | +} | |
20577 | + | |
20578 | +/** | |
20579 | + * lvm_read | |
20580 | + **/ | |
20581 | +static void lvm_read(struct evms_logical_node * node, | |
20582 | + struct buffer_head * bh) | |
20583 | +{ | |
20584 | + struct lvm_logical_volume * volume = node->private; | |
20585 | + struct lvm_physical_volume * pv_entry; | |
20586 | + u64 size = bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
20587 | + u64 new_sector, new_size, pe_start_sector; | |
20588 | + | |
20589 | + /* If this volume is a snapshot, lock the volume, and do | |
20590 | + * the LE-PE translation on its original volume. | |
20591 | + */ | |
20592 | + if ( volume->lv_access & LV_SNAPSHOT ) { | |
20593 | + down(&volume->snap_semaphore); | |
20594 | + if (!volume->snapshot_org) { | |
20595 | + goto out_error; | |
20596 | + } | |
20597 | + node = volume->snapshot_org->volume_node; | |
20598 | + } | |
20599 | + | |
20600 | + /* Make sure the volume is active and readable. */ | |
20601 | + if ( !(volume->lv_access & LV_READ && | |
20602 | + volume->lv_status & LV_ACTIVE) ) { | |
20603 | + goto out_error; | |
20604 | + } | |
20605 | + | |
20606 | + /* Check if I/O goes past end of logical volume. Must use the | |
20607 | + * node, not the volume, so snapshots will work correctly. | |
20608 | + */ | |
20609 | + if ( bh->b_rsector + size > node->total_vsectors ) { | |
20610 | + goto out_error; | |
20611 | + } | |
20612 | + | |
20613 | + /* Logical-to-Physical remapping. Check for incomplete volumes. | |
20614 | + * Check intermediate boundary conditions as well. | |
20615 | + */ | |
20616 | + if ( remap_sector(node, bh->b_rsector, size, &new_sector, | |
20617 | + &new_size, &pe_start_sector, &pv_entry) || | |
20618 | + !pe_start_sector || !pv_entry || | |
20619 | + size != new_size ) { | |
20620 | + goto out_error; | |
20621 | + } | |
20622 | + | |
20623 | + /* For snapshot volumes, check if this sector's chunk has been | |
20624 | + * remapped. If it has, new_sector and pv_entry will be changed | |
20625 | + * accordingly. If not, they remain the same. | |
20626 | + */ | |
20627 | + if ( volume->lv_access & LV_SNAPSHOT ) { | |
20628 | + snapshot_remap_sector(volume, pe_start_sector, | |
20629 | + &new_sector, &pv_entry); | |
20630 | + } | |
20631 | + | |
20632 | + bh->b_rsector = new_sector; | |
20633 | + R_IO(pv_entry->logical_node, bh); | |
20634 | + | |
20635 | +out: | |
20636 | + /* Unlock the snapshot. */ | |
20637 | + if ( volume->lv_access & LV_SNAPSHOT ) { | |
20638 | + up(&volume->snap_semaphore); | |
20639 | + } | |
20640 | + return; | |
20641 | + | |
20642 | +out_error: | |
20643 | + bh->b_end_io(bh, 0); | |
20644 | + goto out; | |
20645 | +} | |
20646 | + | |
20647 | +/** | |
20648 | + * lvm_write | |
20649 | + **/ | |
20650 | +static void lvm_write(struct evms_logical_node * node, | |
20651 | + struct buffer_head * bh) | |
20652 | +{ | |
20653 | + struct lvm_logical_volume * volume = node->private; | |
20654 | + struct lvm_logical_volume * snap_volume; | |
20655 | + struct lvm_physical_volume * pv_entry; | |
20656 | + u64 size = bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
20657 | + u64 new_sector, new_size, pe_start_sector; | |
20658 | + | |
20659 | + /* Make sure the volume is active and writable. */ | |
20660 | + if ( !(volume->lv_access & LV_WRITE && | |
20661 | + volume->lv_status & LV_ACTIVE) ) { | |
20662 | + goto out_error; | |
20663 | + } | |
20664 | + | |
20665 | + /* Check if I/O goes past end of logical volume. */ | |
20666 | + if ( bh->b_rsector + size > node->total_vsectors ) { | |
20667 | + goto out_error; | |
20668 | + } | |
20669 | + | |
20670 | + /* Logical-to-Physical remapping. Check for incomplete volumes. | |
20671 | + * Check intermediate boundary conditions as well. | |
20672 | + */ | |
20673 | + if ( remap_sector(node, bh->b_rsector, size, &new_sector, | |
20674 | + &new_size, &pe_start_sector, &pv_entry) || | |
20675 | + !pe_start_sector || !pv_entry || | |
20676 | + size != new_size ) { | |
20677 | + goto out_error; | |
20678 | + } | |
20679 | + | |
20680 | + /* Copy-on-write for snapshotting. */ | |
20681 | + if ( volume->lv_access & LV_SNAPSHOT_ORG ) { | |
20682 | + /* Originals can be snapshotted multiple times. */ | |
20683 | + for ( snap_volume = volume->snapshot_next; | |
20684 | + snap_volume; snap_volume = snap_volume->snapshot_next ) { | |
20685 | + if ( snapshot_copy_data(volume, snap_volume, | |
20686 | + pe_start_sector, new_sector, | |
20687 | + pv_entry) ) { | |
20688 | + goto out_error; | |
20689 | + } | |
20690 | + } | |
20691 | + } | |
20692 | + | |
20693 | + bh->b_rsector = new_sector; | |
20694 | + W_IO(pv_entry->logical_node, bh); | |
20695 | +out: | |
20696 | + return; | |
20697 | +out_error: | |
20698 | + bh->b_end_io(bh, 0); | |
20699 | + goto out; | |
20700 | +} | |
20701 | + | |
20702 | +/** | |
20703 | + * lvm_init_io | |
20704 | + * | |
20705 | + * Init_io on a snapshot volume treats it like a regular volume. | |
20706 | + **/ | |
20707 | +static int lvm_init_io(struct evms_logical_node * node, | |
20708 | + int io_flag, | |
20709 | + u64 sect_nr, | |
20710 | + u64 num_sects, | |
20711 | + void * buf_addr) | |
20712 | +{ | |
20713 | + struct lvm_logical_volume * volume = node->private; | |
20714 | + struct lvm_physical_volume * pv_entry; | |
20715 | + u64 pe_start_sector, new_sector, new_size; | |
20716 | + int rc = 0; | |
20717 | + | |
20718 | + /* Only allow internal writes to snapshots (io_flag==4). Disallow | |
20719 | + * writes to snapshot originals. | |
20720 | + */ | |
20721 | + if ( io_flag == WRITE && | |
20722 | + volume->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG) ) { | |
20723 | + return -EINVAL; | |
20724 | + } | |
20725 | + | |
20726 | + /* The node for a snapshot reports the size of the original. If a | |
20727 | + * request comes in in that range, just return. | |
20728 | + */ | |
20729 | + else if ( volume->lv_access & LV_SNAPSHOT && | |
20730 | + sect_nr >= volume->lv_size && | |
20731 | + sect_nr < node->total_vsectors ) { | |
20732 | + if ( io_flag == READ ) { | |
20733 | + memset(buf_addr, 0, | |
20734 | + num_sects << EVMS_VSECTOR_SIZE_SHIFT); | |
20735 | + } | |
20736 | + return 0; | |
20737 | + } | |
20738 | + | |
20739 | + /* Regular range check. */ | |
20740 | + else if ( sect_nr + num_sects > volume->lv_size ) { | |
20741 | + return -EINVAL; | |
20742 | + } | |
20743 | + | |
20744 | + if ( io_flag == 4 ) { | |
20745 | + io_flag = WRITE; | |
20746 | + } | |
20747 | + | |
20748 | + /* Init IO needs to deal with the possibility of a request that spans | |
20749 | + * PEs or stripes. This is possible because there is no limit on | |
20750 | + * num_sects. To handle this, we loop through remap_sector and | |
20751 | + * INIT_IO until num_sects reaches zero. | |
20752 | + */ | |
20753 | + while (num_sects) { | |
20754 | + if ( remap_sector(node, sect_nr, num_sects, &new_sector, | |
20755 | + &new_size, &pe_start_sector, &pv_entry) ) { | |
20756 | + return -EIO; | |
20757 | + } | |
20758 | + | |
20759 | + /* If the volume is incomplete, clear the buffer (on a read). */ | |
20760 | + if (!pe_start_sector || !pv_entry) { | |
20761 | + if ( io_flag == READ ) { | |
20762 | + memset(buf_addr, 0, | |
20763 | + new_size << EVMS_VSECTOR_SIZE_SHIFT); | |
20764 | + } | |
20765 | + } else { | |
20766 | + rc = INIT_IO(pv_entry->logical_node, io_flag, | |
20767 | + new_sector, new_size, buf_addr); | |
20768 | + } | |
20769 | + num_sects -= new_size; | |
20770 | + sect_nr += new_size; | |
20771 | + buf_addr = (void *)(((unsigned long) buf_addr) + | |
20772 | + (unsigned long)(new_size << EVMS_VSECTOR_SIZE_SHIFT)); | |
20773 | + } | |
20774 | + | |
20775 | + return rc; | |
20776 | +} | |
20777 | + | |
20778 | +/** | |
20779 | + * lvm_ioctl | |
20780 | + **/ | |
20781 | +static int lvm_ioctl(struct evms_logical_node * logical_node, | |
20782 | + struct inode * inode, | |
20783 | + struct file * file, | |
20784 | + unsigned int cmd, | |
20785 | + unsigned long arg) | |
20786 | +{ | |
20787 | + struct lvm_logical_volume * volume = logical_node->private; | |
20788 | + int rc = 0; | |
20789 | + | |
20790 | + LOG_ENTRY_EXIT("Ioctl %d\n", cmd); | |
20791 | + | |
20792 | + switch (cmd) { | |
20793 | + | |
20794 | + case HDIO_GETGEO: | |
20795 | + { | |
20796 | + /* Fixed geometry for all LVM volumes. */ | |
20797 | + unsigned char heads = 64; | |
20798 | + unsigned char sectors = 32; | |
20799 | + short cylinders; | |
20800 | + long start = 0; | |
20801 | + struct hd_geometry * hd = (struct hd_geometry *)arg; | |
20802 | + cylinders = logical_node->total_vsectors; | |
20803 | + cylinders = (cylinders / heads) / sectors; | |
20804 | + | |
20805 | + if (!hd) { | |
20806 | + return -EINVAL; | |
20807 | + } | |
20808 | + | |
20809 | + if ( copy_to_user((char *)(&hd->heads), | |
20810 | + &heads, sizeof(heads)) || | |
20811 | + copy_to_user((char *)(&hd->sectors), | |
20812 | + §ors, sizeof(sectors)) || | |
20813 | + copy_to_user((short *)(&hd->cylinders), | |
20814 | + &cylinders, sizeof(cylinders)) || | |
20815 | + copy_to_user((long *)(&hd->start), | |
20816 | + &start, sizeof(start)) ) { | |
20817 | + return -EFAULT; | |
20818 | + } | |
20819 | + } | |
20820 | + break; | |
20821 | + | |
20822 | + case EVMS_QUIESCE_VOLUME: | |
20823 | + { | |
20824 | + struct evms_quiesce_vol_pkt * tmp = | |
20825 | + (struct evms_quiesce_vol_pkt *)arg; | |
20826 | + if (tmp->command) { | |
20827 | + volume->lv_access |= EVMS_LV_QUIESCED; | |
20828 | + } else { | |
20829 | + volume->lv_access &= ~EVMS_LV_QUIESCED; | |
20830 | + } | |
20831 | + } | |
20832 | + break; | |
20833 | + | |
20834 | + case EVMS_GET_BMAP: | |
20835 | + { | |
20836 | + struct evms_get_bmap_pkt * bmap = | |
20837 | + (struct evms_get_bmap_pkt *)arg; | |
20838 | + struct evms_logical_node * pv_node; | |
20839 | + | |
20840 | + rc = lvm_get_bmap(logical_node, bmap, &pv_node); | |
20841 | + if (!rc) { | |
20842 | + rc = IOCTL(pv_node, inode, file, cmd, | |
20843 | + (unsigned long) bmap); | |
20844 | + } | |
20845 | + } | |
20846 | + break; | |
20847 | + | |
20848 | + case EVMS_GET_DISK_LIST: | |
20849 | + case EVMS_CHECK_MEDIA_CHANGE: | |
20850 | + case EVMS_REVALIDATE_DISK: | |
20851 | + case EVMS_OPEN_VOLUME: | |
20852 | + case EVMS_CLOSE_VOLUME: | |
20853 | + case EVMS_CHECK_DEVICE_STATUS: | |
20854 | + { | |
20855 | + /* These five ioctl all need to | |
20856 | + * be broadcast to all PVs. | |
20857 | + */ | |
20858 | + struct lvm_volume_group * group = volume->group; | |
20859 | + struct lvm_physical_volume * pv_entry; | |
20860 | + for ( pv_entry = group->pv_list; | |
20861 | + pv_entry; pv_entry = pv_entry->next ) { | |
20862 | + rc |= IOCTL(pv_entry->logical_node, inode, | |
20863 | + file, cmd, arg); | |
20864 | + } | |
20865 | + } | |
20866 | + break; | |
20867 | + | |
20868 | + default: | |
20869 | + /* Currently LVM does not send any ioctl's down to the | |
20870 | + * PVs. Which PV would they go to? What would we do with | |
20871 | + * the return codes? | |
20872 | + */ | |
20873 | + rc = -EINVAL; | |
20874 | + } | |
20875 | + | |
20876 | + return rc; | |
20877 | +} | |
20878 | + | |
20879 | +/** | |
20880 | + * lvm_direct_ioctl | |
20881 | + * | |
20882 | + * This function provides a method for user-space to communicate directly | |
20883 | + * with a plugin in the kernel. | |
20884 | + **/ | |
20885 | +static int lvm_direct_ioctl(struct inode * inode, | |
20886 | + struct file * file, | |
20887 | + unsigned int cmd, | |
20888 | + unsigned long args) | |
20889 | +{ | |
20890 | + struct evms_plugin_ioctl_pkt pkt, * user_pkt; | |
20891 | + struct lvm_pv_remove_ioctl pv_remove, * user_pv_remove; | |
20892 | + struct lvm_snapshot_stat_ioctl snap_stats, * user_snap_stats; | |
20893 | + int rc = 0; | |
20894 | + | |
20895 | + MOD_INC_USE_COUNT; | |
20896 | + | |
20897 | + user_pkt = (struct evms_plugin_ioctl_pkt *)args; | |
20898 | + | |
20899 | + /* Copy user's parameters to kernel space. */ | |
20900 | + if ( copy_from_user(&pkt, user_pkt, sizeof(pkt)) ) { | |
20901 | + MOD_DEC_USE_COUNT; | |
20902 | + return -EFAULT; | |
20903 | + } | |
20904 | + | |
20905 | + /* Make sure this is supposed to be our ioctl. */ | |
20906 | + if ( pkt.feature_id != lvm_plugin_header.id ) { | |
20907 | + MOD_DEC_USE_COUNT; | |
20908 | + return -EINVAL; | |
20909 | + } | |
20910 | + | |
20911 | + switch (pkt.feature_command) { | |
20912 | + | |
20913 | + case EVMS_LVM_PV_REMOVE_IOCTL: | |
20914 | + user_pv_remove = | |
20915 | + (struct lvm_pv_remove_ioctl *)pkt.feature_ioctl_data; | |
20916 | + if ( copy_from_user(&pv_remove, user_pv_remove, | |
20917 | + sizeof(pv_remove)) ) { | |
20918 | + rc = -EINVAL; | |
20919 | + break; | |
20920 | + } | |
20921 | + rc = remove_pv_from_group(pv_remove.pv_number, | |
20922 | + pv_remove.vg_uuid); | |
20923 | + break; | |
20924 | + | |
20925 | + case EVMS_LVM_SNAPSHOT_STAT_IOCTL: | |
20926 | + user_snap_stats = | |
20927 | + (struct lvm_snapshot_stat_ioctl *)pkt.feature_ioctl_data; | |
20928 | + if ( copy_from_user(&snap_stats, user_snap_stats, | |
20929 | + sizeof(snap_stats)) ) { | |
20930 | + rc = -EINVAL; | |
20931 | + break; | |
20932 | + } | |
20933 | + rc = get_snapshot_stats(&snap_stats); | |
20934 | + if ( copy_to_user(user_snap_stats, &snap_stats, | |
20935 | + sizeof(snap_stats)) ) { | |
20936 | + rc = -EINVAL; | |
20937 | + break; | |
20938 | + } | |
20939 | + break; | |
20940 | + | |
20941 | + default: | |
20942 | + rc = -EINVAL; | |
20943 | + break; | |
20944 | + } | |
20945 | + | |
20946 | + pkt.status = rc; | |
20947 | + copy_to_user(user_pkt, &pkt, sizeof(pkt)); | |
20948 | + MOD_DEC_USE_COUNT; | |
20949 | + return rc; | |
20950 | +} | |
20951 | + | |
20952 | +/** | |
20953 | + * lvm_vge_init | |
20954 | + **/ | |
20955 | +int __init lvm_vge_init(void) | |
20956 | +{ | |
20957 | + struct proc_dir_entry *pde; | |
20958 | + | |
20959 | + lvm_group_list = NULL; | |
20960 | + lvm_proc = NULL; | |
20961 | + | |
20962 | + /* Register the global proc-fs entries. */ | |
20963 | + pde = evms_cs_get_evms_proc_dir(); | |
20964 | + if (pde) { | |
20965 | + lvm_proc = create_proc_entry(LVM_PROC_NAME, S_IFDIR, pde); | |
20966 | + if (lvm_proc) { | |
20967 | + create_proc_read_entry(LVM_PROC_GLOBAL_NAME, S_IFREG, | |
20968 | + lvm_proc, lvm_global_proc_read, | |
20969 | + NULL); | |
20970 | + } | |
20971 | + } | |
20972 | + | |
20973 | + /* Register this plugin with EVMS. */ | |
20974 | + return evms_cs_register_plugin(&lvm_plugin_header); | |
20975 | +} | |
20976 | + | |
20977 | +/** | |
20978 | + * lvm_vge_exit | |
20979 | + **/ | |
20980 | +void __exit lvm_vge_exit(void) | |
20981 | +{ | |
20982 | + struct lvm_volume_group * group, * next_group; | |
20983 | + struct proc_dir_entry * pde; | |
20984 | + int i; | |
20985 | + | |
20986 | + /* If LVM is called for module_exit, that means the reference | |
20987 | + * count must be zero, which means there should be no volumes, | |
20988 | + * and thus no volume groups. But, check anyway and delete | |
20989 | + * any volumes and groups that are still hanging around. | |
20990 | + */ | |
20991 | + if (lvm_group_list) { | |
20992 | + LOG_SERIOUS("Called for module_exit, but group list is not empty!\n"); | |
20993 | + } | |
20994 | + | |
20995 | + for ( group = lvm_group_list; group; group = next_group ) { | |
20996 | + next_group = group->next_group; | |
20997 | + | |
20998 | + LOG_SERIOUS("In module_exit: deleting all volumes from group %s.\n", | |
20999 | + group->vg_name); | |
21000 | + | |
21001 | + for ( i = 1; i <= MAX_LV; i++ ) { | |
21002 | + if (group->volume_list[i]) { | |
21003 | + lvm_delete_node(group->volume_list[i]->volume_node); | |
21004 | + } | |
21005 | + } | |
21006 | + } | |
21007 | + | |
21008 | + /* Unregister the proc-fs entries. */ | |
21009 | + pde = evms_cs_get_evms_proc_dir(); | |
21010 | + if (pde) { | |
21011 | + remove_proc_entry(LVM_PROC_GLOBAL_NAME, lvm_proc); | |
21012 | + remove_proc_entry(LVM_PROC_NAME, pde); | |
21013 | + } | |
21014 | + | |
21015 | + /* Unregister this plugin from EVMS. */ | |
21016 | + evms_cs_unregister_plugin(&lvm_plugin_header); | |
21017 | +} | |
21018 | + | |
21019 | +module_init(lvm_vge_init); | |
21020 | +module_exit(lvm_vge_exit); | |
21021 | +#ifdef MODULE_LICENSE | |
21022 | +MODULE_LICENSE("GPL"); | |
21023 | +#endif | |
21024 | + | |
21025 | diff -Naur linux-2002-09-30/drivers/evms/md_core.c evms-2002-09-30/drivers/evms/md_core.c | |
21026 | --- linux-2002-09-30/drivers/evms/md_core.c Wed Dec 31 18:00:00 1969 | |
21027 | +++ evms-2002-09-30/drivers/evms/md_core.c Sun Sep 29 23:25:48 2002 | |
21028 | @@ -0,0 +1,3633 @@ | |
21029 | +/* | |
21030 | + * Copyright (c) International Business Machines Corp., 2000 | |
21031 | + * | |
21032 | + * This program is free software; you can redistribute it and/or modify | |
21033 | + * it under the terms of the GNU General Public License as published by | |
21034 | + * the Free Software Foundation; either version 2 of the License, or | |
21035 | + * (at your option) any later version. | |
21036 | + * | |
21037 | + * This program is distributed in the hope that it will be useful, | |
21038 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21039 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | |
21040 | + * the GNU General Public License for more details. | |
21041 | + * | |
21042 | + * You should have received a copy of the GNU General Public License | |
21043 | + * along with this program; if not, write to the Free Software | |
21044 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
21045 | + * | |
21046 | + * | |
21047 | + * linux/drivers/evms/md_core.c | |
21048 | + * | |
21049 | + * EVMS Linux MD Region Manager | |
21050 | + * | |
21051 | + */ | |
21052 | + | |
21053 | + | |
21054 | +#include <linux/module.h> | |
21055 | +#include <linux/kmod.h> | |
21056 | +#include <linux/kernel.h> | |
21057 | +#include <linux/config.h> | |
21058 | +#include <linux/genhd.h> | |
21059 | +#include <linux/string.h> | |
21060 | +#include <linux/blk.h> | |
21061 | +#include <linux/init.h> | |
21062 | +#include <linux/slab.h> | |
21063 | +#include <linux/vmalloc.h> | |
21064 | +#include <linux/evms/evms.h> | |
21065 | +#include <linux/evms/evms_md.h> | |
21066 | +#include <linux/sysctl.h> | |
21067 | +#include <asm/system.h> | |
21068 | +#include <asm/uaccess.h> | |
21069 | + | |
21070 | +#define LOG_PREFIX "md core: " | |
21071 | + | |
21072 | +/* | |
21073 | + * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | |
21074 | + * is 100 KB/sec, so the extra system load does not show up that much. | |
21075 | + * Increase it if you want to have more _guaranteed_ speed. Note that | |
21076 | + * the RAID driver will use the maximum available bandwith if the IO | |
21077 | + * subsystem is idle. There is also an 'absolute maximum' reconstruction | |
21078 | + * speed limit - in case reconstruction slows down your system despite | |
21079 | + * idle IO detection. | |
21080 | + * | |
21081 | + * you can change it via /proc/sys/dev/raid/speed_limit_min and _max. | |
21082 | + */ | |
21083 | + | |
21084 | +static MD_LIST_HEAD(all_raid_disks); | |
21085 | +static MD_LIST_HEAD(pending_raid_disks); | |
21086 | + | |
21087 | +static int sysctl_speed_limit_min = 100; | |
21088 | +static int sysctl_speed_limit_max = 100000; | |
21089 | + | |
21090 | + | |
21091 | +static mdk_personality_t *pers[MAX_PERSONALITY]; | |
21092 | + | |
21093 | +static int md_blocksizes[MAX_MD_DEVS]; | |
21094 | +static int md_hardsect_sizes[MAX_MD_DEVS]; | |
21095 | +int evms_md_size[MAX_MD_DEVS]; | |
21096 | +static struct evms_thread *evms_md_recovery_thread = NULL; | |
21097 | + | |
21098 | +/* | |
21099 | + * Enables to iterate over all existing md arrays | |
21100 | + */ | |
21101 | +static LIST_HEAD(all_mddevs); | |
21102 | +static LIST_HEAD(incomplete_mddevs); | |
21103 | +static LIST_HEAD(running_mddevs); | |
21104 | + | |
21105 | +/* | |
21106 | + * The mapping between kdev and mddev is not necessary a simple | |
21107 | + * one! Eg. HSM uses several sub-devices to implement Logical | |
21108 | + * Volumes. All these sub-devices map to the same mddev. | |
21109 | + */ | |
21110 | +struct dev_mapping evms_mddev_map[MAX_MD_DEVS]; | |
21111 | + | |
21112 | + | |
21113 | +/* Support functions for discovery */ | |
21114 | +static mdk_rdev_t * evms_md_find_rdev_all (struct evms_logical_node *node); | |
21115 | +static mddev_t * evms_md_find_mddev_all (struct evms_logical_node *node); | |
21116 | +static int evms_md_import_device (struct evms_logical_node **discover_list, | |
21117 | + struct evms_logical_node *node); | |
21118 | +static void evms_md_autostart_arrays(struct evms_logical_node **discover_list); | |
21119 | +static void evms_md_run_devices (struct evms_logical_node **discover_list); | |
21120 | +static int evms_md_run_array (struct evms_logical_node ** discover_list, | |
21121 | + mddev_t *mddev); | |
21122 | +static void evms_md_run_incomplete_array (struct evms_logical_node ** discover_list, | |
21123 | + mddev_t *mddev); | |
21124 | +static int evms_md_create_logical_node(struct evms_logical_node **discover_list, | |
21125 | + mddev_t *mddev, uint flags); | |
21126 | +static int evms_md_read_disk_sb (mdk_rdev_t * rdev); | |
21127 | +static int evms_md_analyze_sbs (mddev_t * mddev); | |
21128 | +static mddev_t * alloc_mddev (kdev_t dev); | |
21129 | +static void free_mddev(mddev_t * mddev); | |
21130 | +static void evms_md_create_recovery_thread(void); | |
21131 | +static void evms_md_destroy_recovery_thread(void); | |
21132 | +static int do_md_run (mddev_t * mddev); | |
21133 | +static int do_md_stop (mddev_t * mddev, int ro); | |
21134 | + | |
21135 | +static void evms_md_export_rdev (mdk_rdev_t * rdev, int delete_node); | |
21136 | +static void kick_rdev_from_array (mdk_rdev_t * rdev); | |
21137 | +static mdp_disk_t *evms_md_find_disk(mddev_t *mddev, kdev_t dev); | |
21138 | +static void remove_descriptor (mdp_disk_t *disk, mdp_super_t *sb); | |
21139 | + | |
21140 | +/* Plugin API prototypes */ | |
21141 | +static int md_discover( struct evms_logical_node ** discover_list ); | |
21142 | +static int md_end_discover( struct evms_logical_node ** discover_list ); | |
21143 | +static int md_delete( struct evms_logical_node * node); | |
21144 | +static void md_read( struct evms_logical_node * node, | |
21145 | + struct buffer_head * bh); | |
21146 | +static void md_write( struct evms_logical_node * node, | |
21147 | + struct buffer_head * bh); | |
21148 | +static int md_sync_io( struct evms_logical_node *node, | |
21149 | + int rw, | |
21150 | + u64 sect_nr, | |
21151 | + u64 num_sects, | |
21152 | + void *data); | |
21153 | +static int md_ioctl( struct evms_logical_node *node, | |
21154 | + struct inode *inode, | |
21155 | + struct file *file, | |
21156 | + unsigned int cmd, | |
21157 | + unsigned long arg); | |
21158 | +static int md_ioctl_cmd_broadcast( | |
21159 | + struct evms_logical_node *node, | |
21160 | + struct inode *inode, | |
21161 | + struct file *file, | |
21162 | + unsigned long cmd, | |
21163 | + unsigned long arg); | |
21164 | + | |
21165 | +static int md_direct_ioctl( | |
21166 | + struct inode *inode, | |
21167 | + struct file *file, | |
21168 | + unsigned int cmd, | |
21169 | + unsigned long arg); | |
21170 | + | |
21171 | +/* global MD data structures */ | |
21172 | +static struct evms_plugin_fops md_fops = { | |
21173 | + .discover = md_discover, | |
21174 | + .end_discover = md_end_discover, | |
21175 | + .delete = md_delete, | |
21176 | + .read = md_read, | |
21177 | + .write = md_write, | |
21178 | + .init_io = md_sync_io, | |
21179 | + .ioctl = md_ioctl, | |
21180 | + .direct_ioctl = md_direct_ioctl | |
21181 | +}; | |
21182 | + | |
21183 | +static struct evms_plugin_header md_plugin_header = { | |
21184 | + .id = SetPluginID(IBM_OEM_ID, | |
21185 | + EVMS_REGION_MANAGER, | |
21186 | + EVMS_MD_ID), | |
21187 | + .version = { | |
21188 | + .major = EVMS_MD_MAJOR_VERSION, | |
21189 | + .minor = EVMS_MD_MINOR_VERSION, | |
21190 | + .patchlevel = EVMS_MD_PATCHLEVEL_VERSION | |
21191 | + }, | |
21192 | + .required_services_version = { | |
21193 | + .major = EVMS_MD_COMMON_SERVICES_MAJOR, | |
21194 | + .minor = EVMS_MD_COMMON_SERVICES_MINOR, | |
21195 | + .patchlevel = EVMS_MD_COMMON_SERVICES_PATCHLEVEL | |
21196 | + }, | |
21197 | + .fops = &md_fops | |
21198 | +}; | |
21199 | + | |
21200 | +/* global variables */ | |
21201 | +static int exported_nodes; /* total # of exported devices | |
21202 | + * produced during this discovery. | |
21203 | + */ | |
21204 | +static struct evms_logical_node **cur_discover_list = NULL; | |
21205 | + | |
21206 | +/**********************************************************/ | |
21207 | +/* SYSCTL - EVMS/RAID folder */ | |
21208 | +/**********************************************************/ | |
21209 | + | |
21210 | +#ifdef CONFIG_PROC_FS | |
21211 | +static struct ctl_table_header *md_table_header; | |
21212 | + | |
21213 | +static ctl_table md_table[] = { | |
21214 | + {DEV_EVMS_MD_SPEED_LIMIT_MIN, "speed_limit_min", | |
21215 | + &sysctl_speed_limit_min, sizeof(int), 0644, NULL, &proc_dointvec}, | |
21216 | + {DEV_EVMS_MD_SPEED_LIMIT_MAX, "speed_limit_max", | |
21217 | + &sysctl_speed_limit_max, sizeof(int), 0644, NULL, &proc_dointvec}, | |
21218 | + {0} | |
21219 | +}; | |
21220 | + | |
21221 | +static ctl_table md_dir_table[] = { | |
21222 | + {DEV_EVMS_MD, "md", NULL, 0, 0555, md_table}, | |
21223 | + {0} | |
21224 | +}; | |
21225 | + | |
21226 | +static ctl_table evms_dir_table[] = { | |
21227 | + {DEV_EVMS, "evms", NULL, 0, 0555, md_dir_table}, | |
21228 | + {0} | |
21229 | +}; | |
21230 | + | |
21231 | +static ctl_table dev_dir_table[] = { | |
21232 | + {CTL_DEV, "dev", NULL, 0, 0555, evms_dir_table}, | |
21233 | + {0} | |
21234 | +}; | |
21235 | +#endif | |
21236 | +/********** Required EVMS Plugin Functions **********/ | |
21237 | + | |
21238 | +/* | |
21239 | + * Function: md_discover | |
21240 | + * We should only export complete MD device nodes | |
21241 | + */ | |
21242 | +static int md_discover( struct evms_logical_node ** discover_list ) | |
21243 | +{ | |
21244 | + MOD_INC_USE_COUNT; | |
21245 | + LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__); | |
21246 | + | |
21247 | + /* initialize global variable */ | |
21248 | + exported_nodes = 0; | |
21249 | + cur_discover_list = discover_list; | |
21250 | + evms_md_autostart_arrays(discover_list); | |
21251 | + | |
21252 | + LOG_ENTRY_EXIT("%s: EXIT (exported nodes: %d)\n", __FUNCTION__,exported_nodes); | |
21253 | + cur_discover_list = NULL; | |
21254 | + MOD_DEC_USE_COUNT; | |
21255 | + return(exported_nodes); | |
21256 | +} | |
21257 | + | |
21258 | +static mddev_t * evms_md_find_incomplete_array(int level) | |
21259 | +{ | |
21260 | + mddev_t *mddev; | |
21261 | + struct list_head *tmp,*tmp2; | |
21262 | + mdk_rdev_t *rdev; | |
21263 | + | |
21264 | + ITERATE_INCOMPLETE_MDDEV(mddev,tmp) { | |
21265 | + ITERATE_RDEV(mddev, rdev, tmp2) { | |
21266 | + if (rdev->sb && rdev->sb->level == level) | |
21267 | + return mddev; | |
21268 | + } | |
21269 | + } | |
21270 | + return NULL; | |
21271 | +} | |
21272 | + | |
21273 | +/* | |
21274 | + * Function: md_end_discover | |
21275 | + */ | |
21276 | +static int md_end_discover( struct evms_logical_node ** discover_list ) | |
21277 | +{ | |
21278 | + int rc = 0; | |
21279 | + struct list_head *tmp; | |
21280 | + mdk_rdev_t *rdev; | |
21281 | + mddev_t *mddev; | |
21282 | + struct evms_logical_node *node; | |
21283 | + int done = FALSE; | |
21284 | + | |
21285 | + MOD_INC_USE_COUNT; | |
21286 | + LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__); | |
21287 | + rc = md_discover(discover_list); | |
21288 | + | |
21289 | + do { | |
21290 | + done = TRUE; | |
21291 | + if ( (mddev = evms_md_find_incomplete_array(5)) != NULL) { | |
21292 | + evms_md_run_incomplete_array(discover_list, mddev); | |
21293 | + done = FALSE; | |
21294 | + continue; | |
21295 | + } | |
21296 | + if ( (mddev = evms_md_find_incomplete_array(1)) != NULL) { | |
21297 | + evms_md_run_incomplete_array(discover_list, mddev); | |
21298 | + done = FALSE; | |
21299 | + continue; | |
21300 | + } | |
21301 | + if ( (mddev = evms_md_find_incomplete_array(0)) != NULL) { | |
21302 | + evms_md_run_incomplete_array(discover_list, mddev); | |
21303 | + done = FALSE; | |
21304 | + continue; | |
21305 | + } | |
21306 | + if ( (mddev = evms_md_find_incomplete_array(-1)) != NULL) { | |
21307 | + evms_md_run_incomplete_array(discover_list, mddev); | |
21308 | + done = FALSE; | |
21309 | + continue; | |
21310 | + } | |
21311 | + | |
21312 | + } while (!done); | |
21313 | + | |
21314 | + | |
21315 | + /* | |
21316 | + * At this point, delete all mddevs which did not start. | |
21317 | + */ | |
21318 | + ITERATE_MDDEV(mddev,tmp) { | |
21319 | + if (mddev->pers == NULL) { | |
21320 | + LOG_WARNING("%s: deleting md%d\n", __FUNCTION__, mdidx(mddev)); | |
21321 | + free_mddev(mddev); | |
21322 | + } | |
21323 | + } | |
21324 | + | |
21325 | + | |
21326 | + /* | |
21327 | + * At this point, delete all rdevs which do not belong to any of discovered MD arrays. | |
21328 | + */ | |
21329 | + ITERATE_RDEV_ALL(rdev, tmp) { | |
21330 | + if (!rdev->mddev) { | |
21331 | + node = rdev->node; | |
21332 | + if (node) { | |
21333 | + if (node->plugin->id == md_plugin_header.id) | |
21334 | + evms_md_export_rdev(rdev, FALSE); | |
21335 | + else | |
21336 | + evms_md_export_rdev(rdev, TRUE); | |
21337 | + } | |
21338 | + } | |
21339 | + } | |
21340 | + | |
21341 | + LOG_ENTRY_EXIT("%s: EXIT\n", __FUNCTION__); | |
21342 | + MOD_DEC_USE_COUNT; | |
21343 | + return rc; | |
21344 | +} | |
21345 | + | |
21346 | + | |
21347 | +/* | |
21348 | + * Function: md_delete_node | |
21349 | + */ | |
21350 | +static int md_delete( struct evms_logical_node * node) | |
21351 | +{ | |
21352 | + struct evms_md *evms_md; | |
21353 | + mddev_t *mddev; | |
21354 | + | |
21355 | + evms_md = node->private; | |
21356 | + mddev = evms_md->mddev; | |
21357 | + LOG_DEFAULT("md_delete() [%s]\n", evms_md_partition_name(node)); | |
21358 | + | |
21359 | + if (mddev) | |
21360 | + do_md_stop(mddev,0); | |
21361 | + if (evms_md) { | |
21362 | + if (evms_md->instance_plugin_hdr.fops) | |
21363 | + kfree(evms_md->instance_plugin_hdr.fops); | |
21364 | + kfree(evms_md); | |
21365 | + } | |
21366 | + | |
21367 | + evms_cs_deallocate_logical_node(node); | |
21368 | + return 0; | |
21369 | +} | |
21370 | + | |
21371 | + | |
21372 | +/* | |
21373 | + * Function: md_read | |
21374 | + */ | |
21375 | +static void md_read( struct evms_logical_node * node, | |
21376 | + struct buffer_head * bh) | |
21377 | +{ | |
21378 | + struct evms_md *evms_md; | |
21379 | + mddev_t *mddev; | |
21380 | + | |
21381 | + evms_md = node->private; | |
21382 | + mddev = evms_md->mddev; | |
21383 | + if (evms_md_check_boundary(node, bh)) return; | |
21384 | + if (mddev && mddev->pers) | |
21385 | + mddev->pers->read(node, bh); | |
21386 | +} | |
21387 | + | |
21388 | + | |
21389 | +/* | |
21390 | + * Function: md_write | |
21391 | + */ | |
21392 | +static void md_write( struct evms_logical_node * node, | |
21393 | + struct buffer_head * bh) | |
21394 | +{ | |
21395 | + struct evms_md *evms_md; | |
21396 | + mddev_t *mddev; | |
21397 | + | |
21398 | + evms_md = node->private; | |
21399 | + mddev = evms_md->mddev; | |
21400 | + if (evms_md_check_boundary(node, bh)) return; | |
21401 | + if (mddev->ro) { | |
21402 | + LOG_ERROR("%s: read-only is set for [%s]\n", __FUNCTION__, node->name); | |
21403 | + bh->b_end_io(bh, 0); | |
21404 | + return; | |
21405 | + } | |
21406 | + if (mddev && mddev->pers) | |
21407 | + mddev->pers->write(node, bh); | |
21408 | +} | |
21409 | + | |
21410 | +/* | |
21411 | + * Function: md_sync_io | |
21412 | + */ | |
21413 | +static int md_sync_io( | |
21414 | + struct evms_logical_node *node, | |
21415 | + int rw, | |
21416 | + u64 sect_nr, | |
21417 | + u64 num_sects, | |
21418 | + void *buf_addr) | |
21419 | +{ | |
21420 | + struct evms_md *evms_md; | |
21421 | + mddev_t *mddev; | |
21422 | + int rc = 0; | |
21423 | + | |
21424 | + evms_md = node->private; | |
21425 | + mddev = evms_md->mddev; | |
21426 | + | |
21427 | + if (sect_nr + num_sects > node->total_vsectors) { | |
21428 | + LOG_ERROR("%s: attempt to %s beyond MD device(%s) boundary("PFU64") with sect_nr("PFU64") and num_sects("PFU64")\n", | |
21429 | + __FUNCTION__, | |
21430 | + rw ? "WRITE" : "READ", | |
21431 | + node->name, | |
21432 | + node->total_vsectors, | |
21433 | + sect_nr,num_sects); | |
21434 | + rc = -EINVAL; | |
21435 | + } | |
21436 | + | |
21437 | + if ((mddev->ro) && (rw != READ)) { | |
21438 | + LOG_ERROR("%s: read-only is set for [%s]\n", __FUNCTION__, node->name); | |
21439 | + return -EINVAL; | |
21440 | + } | |
21441 | + | |
21442 | + if (!rc && mddev && mddev->pers) { | |
21443 | + /* | |
21444 | + * Check if the personality can handle synchronous I/O, | |
21445 | + * otherwise use the generic function. | |
21446 | + */ | |
21447 | + if (mddev->pers->sync_io) | |
21448 | + rc = mddev->pers->sync_io(mddev, rw, sect_nr, num_sects, buf_addr); | |
21449 | + else | |
21450 | + rc = evms_md_sync_io(node, rw, sect_nr, num_sects, buf_addr); | |
21451 | + } else | |
21452 | + rc = -EINVAL; | |
21453 | + return rc; | |
21454 | +} | |
21455 | + | |
21456 | +/** | |
21457 | + * md_end_sync_request - End IO handler for synchronous I/O functions | |
21458 | + **/ | |
21459 | +static void md_end_sync_request(struct buffer_head *bh, int uptodate) | |
21460 | +{ | |
21461 | + struct evms_md_sync_cb * cb = (struct evms_md_sync_cb *) bh->b_private; | |
21462 | + | |
21463 | + if (!uptodate) | |
21464 | + cb->rc |= -EIO; | |
21465 | + /* we are done with the bh */ | |
21466 | + evms_cs_deallocate_to_pool(evms_bh_pool, bh); | |
21467 | + | |
21468 | + if (atomic_dec_and_test(&cb->io_count)) { | |
21469 | + if (waitqueue_active(&cb->wait)) | |
21470 | + wake_up(&cb->wait); | |
21471 | + } | |
21472 | +} | |
21473 | + | |
21474 | +/** | |
21475 | + * md_sync_request_submit_bh - submit a page-size bh | |
21476 | + * @node - target MD node | |
21477 | + * @bh - pointer to the buffer head | |
21478 | + * @sector - the sector number | |
21479 | + * @data - pointer to buffer | |
21480 | + * @rw - READ/WRITE | |
21481 | + * @cb - MD synchronous I/O control block | |
21482 | + **/ | |
21483 | +static inline void md_sync_request_submit_bh( | |
21484 | + struct evms_logical_node *node, | |
21485 | + struct buffer_head *bh, | |
21486 | + unsigned long sector, | |
21487 | + char *data, | |
21488 | + int rw, | |
21489 | + struct evms_md_sync_cb *cb) | |
21490 | +{ | |
21491 | + | |
21492 | + bh->b_this_page = (struct buffer_head *)1; | |
21493 | + bh->b_rsector = sector; | |
21494 | + bh->b_size = PAGE_SIZE; | |
21495 | + bh->b_state = 0; | |
21496 | + set_bit(BH_Dirty, &bh->b_state); | |
21497 | + set_bit(BH_Lock, &bh->b_state); | |
21498 | + set_bit(BH_Req, &bh->b_state); | |
21499 | + set_bit(BH_Mapped, &bh->b_state); | |
21500 | + atomic_set(&bh->b_count, 1); | |
21501 | + bh->b_data = data; | |
21502 | + bh->b_page = virt_to_page(data); | |
21503 | + bh->b_list = BUF_LOCKED; | |
21504 | + bh->b_end_io = md_end_sync_request; | |
21505 | + bh->b_private = cb; | |
21506 | + atomic_inc(&cb->io_count); | |
21507 | + if (rw == READ) | |
21508 | + R_IO(node,bh); | |
21509 | + else | |
21510 | + W_IO(node,bh); | |
21511 | +} | |
21512 | + | |
21513 | +/** | |
21514 | + * evms_md_allocate_bh | |
21515 | + * | |
21516 | + * Note that this function will not return unless we got a free bh | |
21517 | + **/ | |
21518 | +static inline struct buffer_head *evms_md_allocate_bh(void) | |
21519 | +{ | |
21520 | + struct buffer_head *bh; | |
21521 | + | |
21522 | + while ((bh = evms_cs_allocate_from_pool(evms_bh_pool, FALSE)) == NULL) | |
21523 | + schedule(); /* just yield for a someone to deallocate a bh */ | |
21524 | + init_waitqueue_head(&bh->b_wait); | |
21525 | + bh->b_count = (atomic_t)ATOMIC_INIT(0); | |
21526 | + return(bh); | |
21527 | +} | |
21528 | + | |
21529 | +/** | |
21530 | + * md_partial_sync_io - | |
21531 | + * This function handles synchronous I/O when sector is not page aligned | |
21532 | + * @node - evms node for the MD array | |
21533 | + * @rw - READ/WRITE | |
21534 | + * @sector - the sector | |
21535 | + * @nsects - on input, the total sectors for the request | |
21536 | + * @nsects - on output, number of sectors completed | |
21537 | + * @data - data buffer | |
21538 | + **/ | |
21539 | +int evms_md_partial_sync_io( | |
21540 | + struct evms_logical_node *node, | |
21541 | + int rw, | |
21542 | + u64 sector, | |
21543 | + u32 *nsects, | |
21544 | + void *data) | |
21545 | +{ | |
21546 | + int rc; | |
21547 | + u32 offset, size; | |
21548 | + struct buffer_head *bh; | |
21549 | + struct evms_md_sync_cb cb; | |
21550 | + char *page; | |
21551 | + | |
21552 | + size = (u32)(*nsects << EVMS_VSECTOR_SIZE_SHIFT); | |
21553 | + | |
21554 | + /* calculate byte offset */ | |
21555 | + offset = (u32)((sector & (EVMS_MD_SECTS_PER_PAGE-1)) << EVMS_VSECTOR_SIZE_SHIFT); | |
21556 | + if (!offset && (*nsects >= EVMS_MD_SECTS_PER_PAGE)) { | |
21557 | + *nsects = 0; | |
21558 | + return 0; /* Nothing to do */ | |
21559 | + } | |
21560 | + | |
21561 | + page = NULL; | |
21562 | + rc = 0; | |
21563 | + | |
21564 | + page = kmalloc(PAGE_SIZE, GFP_KERNEL); | |
21565 | + if (!page) { | |
21566 | + LOG_ERROR("%s: no memory!\n", __FUNCTION__); | |
21567 | + rc = -ENOMEM; | |
21568 | + } | |
21569 | + | |
21570 | + bh = evms_md_allocate_bh(); | |
21571 | + | |
21572 | + if (!rc) { | |
21573 | + memset(&cb, 0, sizeof(cb)); | |
21574 | + init_waitqueue_head(&cb.wait); | |
21575 | + cb.io_count = (atomic_t)ATOMIC_INIT(0); | |
21576 | + md_sync_request_submit_bh( | |
21577 | + node, bh, | |
21578 | + (unsigned long)(sector & EVMS_MD_SECTS_PER_PAGE_MASK), | |
21579 | + page, READ, &cb); | |
21580 | + wait_disk_event(cb.wait, !atomic_read(&cb.io_count)); | |
21581 | + rc |= cb.rc; | |
21582 | + } | |
21583 | + | |
21584 | + if (!rc) { | |
21585 | + size = (size <= (PAGE_SIZE - offset)) ? size : (PAGE_SIZE - offset); | |
21586 | + | |
21587 | + switch (rw) { | |
21588 | + case READ: | |
21589 | + /* copy data and return */ | |
21590 | + memcpy(data, page+offset, size); | |
21591 | + break; | |
21592 | + case WRITE: | |
21593 | + /* copy data and then write */ | |
21594 | + memcpy(page+offset, data, size); | |
21595 | + | |
21596 | + bh = evms_md_allocate_bh(); | |
21597 | + | |
21598 | + md_sync_request_submit_bh( | |
21599 | + node, bh, | |
21600 | + (unsigned long)(sector & EVMS_MD_SECTS_PER_PAGE_MASK), | |
21601 | + page, WRITE, &cb); | |
21602 | + wait_disk_event(cb.wait, !atomic_read(&cb.io_count)); | |
21603 | + rc |= cb.rc; | |
21604 | + break; | |
21605 | + default: | |
21606 | + rc = -EINVAL; | |
21607 | + } | |
21608 | + } | |
21609 | + | |
21610 | + if (page) | |
21611 | + kfree(page); | |
21612 | + | |
21613 | + if (!rc) | |
21614 | + *nsects = (u64)(size >> EVMS_VSECTOR_SIZE_SHIFT); | |
21615 | + else | |
21616 | + *nsects = 0; | |
21617 | + return rc; | |
21618 | +} | |
21619 | + | |
21620 | +/** | |
21621 | + * evms_md_sync_io - This function handles synchronous I/O | |
21622 | + **/ | |
21623 | +int evms_md_sync_io( | |
21624 | + struct evms_logical_node *node, | |
21625 | + int rw, | |
21626 | + u64 sector, | |
21627 | + u64 total_nr_sects, | |
21628 | + void *data ) | |
21629 | +{ | |
21630 | + int rc = 0; | |
21631 | + u64 total_nr_pages, size; | |
21632 | + u32 nsects; | |
21633 | + struct buffer_head *bh; | |
21634 | + struct evms_md_sync_cb cb; | |
21635 | + | |
21636 | + if (sector % EVMS_MD_SECTS_PER_PAGE) { | |
21637 | + nsects = total_nr_sects; | |
21638 | + rc = evms_md_partial_sync_io(node, rw, sector, &nsects, data); | |
21639 | + if (!rc) { | |
21640 | + total_nr_sects -= nsects; | |
21641 | + sector += nsects; | |
21642 | + data += (nsects << EVMS_VSECTOR_SIZE_SHIFT); | |
21643 | + if (total_nr_sects == 0) | |
21644 | + return rc; | |
21645 | + } else { | |
21646 | + return rc; | |
21647 | + } | |
21648 | + } | |
21649 | + | |
21650 | + total_nr_pages = total_nr_sects / EVMS_MD_SECTS_PER_PAGE; | |
21651 | + size = total_nr_sects << EVMS_VSECTOR_SIZE_SHIFT; | |
21652 | + | |
21653 | + memset(&cb, 0, sizeof(cb)); | |
21654 | + init_waitqueue_head(&cb.wait); | |
21655 | + cb.io_count = (atomic_t)ATOMIC_INIT(0); | |
21656 | + | |
21657 | + while (!rc && total_nr_pages) { | |
21658 | + | |
21659 | + bh = evms_md_allocate_bh(); | |
21660 | + | |
21661 | + md_sync_request_submit_bh(node, bh,(unsigned long)sector, data, rw, &cb); | |
21662 | + | |
21663 | + sector += EVMS_MD_SECTS_PER_PAGE; | |
21664 | + size -= PAGE_SIZE; | |
21665 | + total_nr_pages--; | |
21666 | + data += PAGE_SIZE; | |
21667 | + } | |
21668 | + if (!rc) { | |
21669 | + wait_disk_event(cb.wait, !atomic_read(&cb.io_count)); | |
21670 | + rc |= cb.rc; | |
21671 | + } | |
21672 | + | |
21673 | + if (!rc && size) { | |
21674 | + nsects = size >> EVMS_VSECTOR_SIZE_SHIFT; | |
21675 | + rc = evms_md_partial_sync_io(node, rw, sector, &nsects, data); | |
21676 | + } | |
21677 | + | |
21678 | + return(rc); | |
21679 | +} | |
21680 | + | |
21681 | +/* | |
21682 | + * Function: md_ioctl | |
21683 | + */ | |
21684 | +static int md_ioctl( | |
21685 | + struct evms_logical_node * node, | |
21686 | + struct inode * inode, | |
21687 | + struct file * file, | |
21688 | + unsigned int cmd, | |
21689 | + unsigned long arg) | |
21690 | +{ | |
21691 | + struct evms_md * evms_md = node->private; | |
21692 | + mddev_t *mddev; | |
21693 | + int rc = 0; | |
21694 | + | |
21695 | + if ((!inode) || (!evms_md) ) | |
21696 | + rc = -EINVAL; | |
21697 | + | |
21698 | + if (!rc) { | |
21699 | + switch (cmd) { | |
21700 | + /* | |
21701 | + * We have a problem here : there is no easy way to give a CHS | |
21702 | + * virtual geometry. We currently pretend that we have a 2 heads | |
21703 | + * 4 sectors (with a BIG number of cylinders...). This drives | |
21704 | + * dosfs just mad... ;-) | |
21705 | + */ | |
21706 | + | |
21707 | + case HDIO_GETGEO: | |
21708 | + { | |
21709 | + struct hd_geometry hdgeo; | |
21710 | + hdgeo.heads = 2; | |
21711 | + hdgeo.sectors = 4; | |
21712 | + hdgeo.cylinders = ((unsigned int)node->total_vsectors) / | |
21713 | + hdgeo.heads / hdgeo.sectors; | |
21714 | + hdgeo.start = 0; | |
21715 | + if (copy_to_user((int *)arg, | |
21716 | + &hdgeo, | |
21717 | + sizeof(hdgeo))) | |
21718 | + rc = -EFAULT; | |
21719 | + } | |
21720 | + break; | |
21721 | + case EVMS_QUIESCE_VOLUME: | |
21722 | + case EVMS_GET_DISK_LIST: | |
21723 | + case EVMS_CHECK_MEDIA_CHANGE: | |
21724 | + case EVMS_REVALIDATE_DISK: | |
21725 | + case EVMS_OPEN_VOLUME: | |
21726 | + case EVMS_CLOSE_VOLUME: | |
21727 | + case EVMS_CHECK_DEVICE_STATUS: | |
21728 | + rc = md_ioctl_cmd_broadcast( | |
21729 | + node, inode, file, cmd, arg); | |
21730 | + break; | |
21731 | + case EVMS_PLUGIN_IOCTL: | |
21732 | + rc = md_direct_ioctl( | |
21733 | + inode, file, cmd, arg); | |
21734 | + break; | |
21735 | + default: | |
21736 | + mddev = evms_md->mddev; | |
21737 | + if (mddev == NULL) { | |
21738 | + rc = -ENODEV; | |
21739 | + } else if (mddev->pers->evms_ioctl == NULL) { | |
21740 | + rc = -ENOSYS; | |
21741 | + } else { | |
21742 | + rc = mddev->pers->evms_ioctl(mddev, inode, file, cmd, arg); | |
21743 | + } | |
21744 | + } | |
21745 | + } | |
21746 | + return(rc); | |
21747 | +} | |
21748 | + | |
21749 | +static int md_ioctl_cmd_broadcast( | |
21750 | + struct evms_logical_node *node, | |
21751 | + struct inode *inode, | |
21752 | + struct file *file, | |
21753 | + unsigned long cmd, | |
21754 | + unsigned long arg) | |
21755 | +{ | |
21756 | + int rc = 0; | |
21757 | + struct evms_md *evms_md; | |
21758 | + mddev_t *mddev; | |
21759 | + struct list_head *tmp; | |
21760 | + mdk_rdev_t *rdev; | |
21761 | + | |
21762 | + evms_md = node->private; | |
21763 | + mddev = evms_md->mddev; | |
21764 | + | |
21765 | + /* broadcast this cmd to all children */ | |
21766 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
21767 | + if (!rdev->mddev) { | |
21768 | + MD_BUG(); | |
21769 | + continue; | |
21770 | + } | |
21771 | + if (!rdev->virtual_spare) { | |
21772 | + rc |= IOCTL(rdev->node, inode, file, cmd, arg); | |
21773 | + } | |
21774 | + } | |
21775 | + return (rc); | |
21776 | +} | |
21777 | + | |
21778 | + | |
21779 | +static int evms_md_add_virtual_spare (mddev_t *mddev, kdev_t dev) | |
21780 | +{ | |
21781 | + mdk_rdev_t *rdev; | |
21782 | + mdp_disk_t *disk = NULL; | |
21783 | + int i; | |
21784 | + | |
21785 | + if (evms_md_find_rdev(mddev,dev)) | |
21786 | + return -EEXIST; | |
21787 | + | |
21788 | + LOG_ENTRY_EXIT("%s ENTRY\n", __FUNCTION__); | |
21789 | + if ((rdev = kmalloc(sizeof(*rdev),GFP_KERNEL)) == NULL) | |
21790 | + return -ENOMEM; | |
21791 | + | |
21792 | + memset(rdev, 0, sizeof(*rdev)); | |
21793 | + | |
21794 | + for (i = mddev->sb->raid_disks; i < MD_SB_DISKS; i++) { | |
21795 | + disk = mddev->sb->disks + i; | |
21796 | + if (!disk->major && !disk->minor) | |
21797 | + break; | |
21798 | + if (disk_removed(disk)) | |
21799 | + break; | |
21800 | + } | |
21801 | + if (i == MD_SB_DISKS) { | |
21802 | + LOG_WARNING("%s : [md%d]can not hot-add to full array!\n", __FUNCTION__, mdidx(mddev)); | |
21803 | + kfree(rdev); | |
21804 | + return -EBUSY; | |
21805 | + } | |
21806 | + | |
21807 | + if (disk_removed(disk)) { | |
21808 | + /* | |
21809 | + * reuse slot | |
21810 | + */ | |
21811 | + if (disk->number != i) { | |
21812 | + MD_BUG(); | |
21813 | + kfree(rdev); | |
21814 | + return -EINVAL; | |
21815 | + } | |
21816 | + } else { | |
21817 | + disk->number = i; | |
21818 | + } | |
21819 | + | |
21820 | + disk->raid_disk = disk->number; | |
21821 | + disk->major = MAJOR(dev); | |
21822 | + disk->minor = MINOR(dev); | |
21823 | + | |
21824 | + mark_disk_spare(disk); | |
21825 | + | |
21826 | + rdev->mddev = mddev; | |
21827 | + rdev->dev = dev; | |
21828 | + rdev->desc_nr = disk->number; | |
21829 | + rdev->virtual_spare = 1; | |
21830 | + | |
21831 | + /* bind rdev to mddev array */ | |
21832 | + list_add(&rdev->all, &all_raid_disks); | |
21833 | + list_add(&rdev->same_set, &mddev->disks); | |
21834 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
21835 | + | |
21836 | + mddev->sb->nr_disks++; | |
21837 | + mddev->sb->spare_disks++; | |
21838 | + mddev->sb->working_disks++; | |
21839 | + mddev->nb_dev++; | |
21840 | + | |
21841 | + mddev->sb_dirty = 1; | |
21842 | + | |
21843 | + evms_md_update_sb(mddev); | |
21844 | + | |
21845 | + return 0; | |
21846 | +} | |
21847 | + | |
21848 | +static int evms_md_remove_disk(mddev_t *mddev, kdev_t dev) | |
21849 | +{ | |
21850 | + mdk_rdev_t *rdev = NULL; | |
21851 | + mdp_disk_t *disk; | |
21852 | + int rc = 0; | |
21853 | + | |
21854 | + disk = evms_md_find_disk(mddev,dev); | |
21855 | + if (!disk) | |
21856 | + return -ENODEV; | |
21857 | + | |
21858 | + rdev = evms_md_find_rdev(mddev,dev); | |
21859 | + | |
21860 | + if (rdev && !rdev->faulty) { | |
21861 | + /* | |
21862 | + * The disk is active in the array, | |
21863 | + * must ask the personality to do it | |
21864 | + */ | |
21865 | + if (mddev->pers && mddev->pers->diskop) { | |
21866 | + /* Assume spare, try to remove it first. */ | |
21867 | + rc = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_SPARE); | |
21868 | + if (rc) | |
21869 | + rc = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_DISK); | |
21870 | + } else | |
21871 | + rc = -ENOSYS; | |
21872 | + } | |
21873 | + | |
21874 | + if (!rc) { | |
21875 | + remove_descriptor(disk,mddev->sb); | |
21876 | + if (rdev) | |
21877 | + kick_rdev_from_array(rdev); | |
21878 | + mddev->sb_dirty = 1; | |
21879 | + evms_md_update_sb(mddev); | |
21880 | + | |
21881 | + } | |
21882 | + return rc; | |
21883 | +} | |
21884 | + | |
21885 | + | |
21886 | +/* | |
21887 | + * Function: md_direct_ioctl | |
21888 | + * | |
21889 | + * This function provides a method for user-space to communicate directly | |
21890 | + * with a plugin in the kernel. | |
21891 | + */ | |
21892 | +static int md_direct_ioctl( | |
21893 | + struct inode * inode, | |
21894 | + struct file * file, | |
21895 | + unsigned int cmd, | |
21896 | + unsigned long args ) | |
21897 | +{ | |
21898 | + struct evms_plugin_ioctl_pkt argument; | |
21899 | + kdev_t md_kdev; | |
21900 | + mddev_t *mddev = NULL; | |
21901 | + struct evms_md_ioctl ioctl_arg; | |
21902 | + struct evms_md_kdev device; | |
21903 | + struct evms_md_array_info array_info, *usr_array_info; | |
21904 | + int rc = 0; | |
21905 | + | |
21906 | + MOD_INC_USE_COUNT; | |
21907 | + | |
21908 | + // Copy user's parameters to kernel space | |
21909 | + if ( copy_from_user(&argument, (struct evms_plugin_ioctl_pkt*)args, sizeof(argument)) ) { | |
21910 | + MOD_DEC_USE_COUNT; | |
21911 | + return -EFAULT; | |
21912 | + } | |
21913 | + | |
21914 | + // Make sure this is supposed to be our ioctl. | |
21915 | + if ( argument.feature_id != md_plugin_header.id ) { | |
21916 | + MOD_DEC_USE_COUNT; | |
21917 | + return -EINVAL; | |
21918 | + } | |
21919 | + | |
21920 | + // Copy user's md ioclt parmeters to kernel space | |
21921 | + if ( copy_from_user(&ioctl_arg, | |
21922 | + (struct evms_md_ioctl*)argument.feature_ioctl_data, | |
21923 | + sizeof(ioctl_arg)) ) | |
21924 | + rc = -EFAULT; | |
21925 | + else { | |
21926 | + if (ioctl_arg.mddev_idx < MAX_MD_DEVS) { | |
21927 | + md_kdev = MKDEV(MD_MAJOR, ioctl_arg.mddev_idx); | |
21928 | + mddev = kdev_to_mddev(md_kdev); | |
21929 | + if (mddev == NULL) | |
21930 | + rc = -ENODEV; | |
21931 | + } else | |
21932 | + rc = -ENODEV; | |
21933 | + } | |
21934 | + | |
21935 | + if (!rc) { | |
21936 | + switch(argument.feature_command) { | |
21937 | + case EVMS_MD_PERS_IOCTL_CMD: | |
21938 | + if (mddev->pers->md_pers_ioctl == NULL) { | |
21939 | + MOD_DEC_USE_COUNT; | |
21940 | + return -ENOSYS; | |
21941 | + } | |
21942 | + rc = mddev->pers->md_pers_ioctl(mddev, | |
21943 | + ioctl_arg.cmd, | |
21944 | + ioctl_arg.arg); | |
21945 | + copy_to_user((struct evms_md_ioctl*)argument.feature_ioctl_data, | |
21946 | + &ioctl_arg, | |
21947 | + sizeof(ioctl_arg)); | |
21948 | + break; | |
21949 | + | |
21950 | + case EVMS_MD_ADD: | |
21951 | + if ( copy_from_user(&device, | |
21952 | + (struct evms_md_kdev *)ioctl_arg.arg, | |
21953 | + sizeof(device)) ) | |
21954 | + rc = -EFAULT; | |
21955 | + else | |
21956 | + rc = evms_md_add_virtual_spare(mddev,MKDEV(device.major, device.minor)); | |
21957 | + break; | |
21958 | + | |
21959 | + case EVMS_MD_REMOVE: | |
21960 | + if ( copy_from_user(&device, | |
21961 | + (struct evms_md_kdev *)ioctl_arg.arg, | |
21962 | + sizeof(device)) ) | |
21963 | + rc = -EFAULT; | |
21964 | + else | |
21965 | + rc = evms_md_remove_disk(mddev,MKDEV(device.major, device.minor)); | |
21966 | + break; | |
21967 | + | |
21968 | + case EVMS_MD_ACTIVATE: | |
21969 | + rc = -ENOSYS; | |
21970 | + break; | |
21971 | + | |
21972 | + case EVMS_MD_DEACTIVATE: | |
21973 | + rc = -ENOSYS; | |
21974 | + break; | |
21975 | + | |
21976 | + case EVMS_MD_GET_ARRAY_INFO: | |
21977 | + | |
21978 | + usr_array_info = (struct evms_md_array_info *)ioctl_arg.arg; | |
21979 | + if ( copy_from_user(&array_info, usr_array_info, | |
21980 | + sizeof(array_info)) ) | |
21981 | + rc = -EFAULT; | |
21982 | + else { | |
21983 | + array_info.state = 0; | |
21984 | + if (mddev->curr_resync) | |
21985 | + array_info.state |= EVMS_MD_ARRAY_SYNCING; | |
21986 | + copy_to_user(&usr_array_info->state, &array_info.state, | |
21987 | + sizeof(usr_array_info->state)); | |
21988 | + if (copy_to_user(array_info.sb, mddev->sb, | |
21989 | + sizeof(mdp_super_t))) | |
21990 | + rc = -EFAULT; | |
21991 | + } | |
21992 | + break; | |
21993 | + default: | |
21994 | + rc = -ENOSYS; | |
21995 | + break; | |
21996 | + } | |
21997 | + } | |
21998 | + | |
21999 | + argument.status = rc; | |
22000 | + copy_to_user((struct evms_plugin_ioctl_pkt*)args, &argument, sizeof(argument)); | |
22001 | + MOD_DEC_USE_COUNT; | |
22002 | + return rc; | |
22003 | +} | |
22004 | + | |
22005 | + | |
22006 | + | |
22007 | + | |
22008 | +void evms_md_add_mddev_mapping (mddev_t * mddev, kdev_t dev, void *data) | |
22009 | +{ | |
22010 | + unsigned int minor = MINOR(dev); | |
22011 | + | |
22012 | + if (MAJOR(dev) != MD_MAJOR) { | |
22013 | + MD_BUG(); | |
22014 | + return; | |
22015 | + } | |
22016 | + if (evms_mddev_map[minor].mddev != NULL) { | |
22017 | + MD_BUG(); | |
22018 | + return; | |
22019 | + } | |
22020 | + evms_mddev_map[minor].mddev = mddev; | |
22021 | + evms_mddev_map[minor].data = data; | |
22022 | +} | |
22023 | + | |
22024 | +void evms_md_del_mddev_mapping (mddev_t * mddev, kdev_t dev) | |
22025 | +{ | |
22026 | + unsigned int minor = MINOR(dev); | |
22027 | + | |
22028 | + if (MAJOR(dev) != MD_MAJOR) { | |
22029 | + MD_BUG(); | |
22030 | + return; | |
22031 | + } | |
22032 | + if (evms_mddev_map[minor].mddev != mddev) { | |
22033 | + MD_BUG(); | |
22034 | + return; | |
22035 | + } | |
22036 | + evms_mddev_map[minor].mddev = NULL; | |
22037 | + evms_mddev_map[minor].data = NULL; | |
22038 | +} | |
22039 | + | |
22040 | +static mddev_t * alloc_mddev (kdev_t dev) | |
22041 | +{ | |
22042 | + mddev_t *mddev; | |
22043 | + | |
22044 | + if (MAJOR(dev) != MD_MAJOR) { | |
22045 | + MD_BUG(); | |
22046 | + return 0; | |
22047 | + } | |
22048 | + mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); | |
22049 | + if (!mddev) | |
22050 | + return NULL; | |
22051 | + | |
22052 | + memset(mddev, 0, sizeof(*mddev)); | |
22053 | + | |
22054 | + mddev->__minor = MINOR(dev); | |
22055 | + init_MUTEX(&mddev->reconfig_sem); | |
22056 | + init_MUTEX(&mddev->recovery_sem); | |
22057 | + init_MUTEX(&mddev->resync_sem); | |
22058 | + INIT_LIST_HEAD(&mddev->disks); | |
22059 | + INIT_LIST_HEAD(&mddev->all_mddevs); | |
22060 | + INIT_LIST_HEAD(&mddev->incomplete_mddevs); | |
22061 | + INIT_LIST_HEAD(&mddev->running_mddevs); | |
22062 | + mddev->active = (atomic_t)ATOMIC_INIT(0); | |
22063 | + mddev->recovery_active = (atomic_t)ATOMIC_INIT(0); | |
22064 | + | |
22065 | + /* | |
22066 | + * The 'base' mddev is the one with data NULL. | |
22067 | + * personalities can create additional mddevs | |
22068 | + * if necessary. | |
22069 | + */ | |
22070 | + evms_md_add_mddev_mapping(mddev, dev, 0); | |
22071 | + list_add(&mddev->all_mddevs, &all_mddevs); | |
22072 | + | |
22073 | + MOD_INC_USE_COUNT; | |
22074 | + evms_md_create_recovery_thread(); | |
22075 | + | |
22076 | + return mddev; | |
22077 | +} | |
22078 | + | |
22079 | +mdk_rdev_t * evms_md_find_rdev_nr(mddev_t *mddev, int nr) | |
22080 | +{ | |
22081 | + mdk_rdev_t * rdev; | |
22082 | + struct list_head *tmp; | |
22083 | + | |
22084 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22085 | + if (rdev->desc_nr == nr) | |
22086 | + return rdev; | |
22087 | + } | |
22088 | + return NULL; | |
22089 | +} | |
22090 | + | |
22091 | + | |
22092 | +mdk_rdev_t * evms_md_find_rdev(mddev_t * mddev, kdev_t dev) | |
22093 | +{ | |
22094 | + struct list_head *tmp; | |
22095 | + mdk_rdev_t *rdev; | |
22096 | + | |
22097 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22098 | + if (rdev->dev == dev) | |
22099 | + return rdev; | |
22100 | + } | |
22101 | + return NULL; | |
22102 | +} | |
22103 | + | |
22104 | +mdk_rdev_t * evms_md_find_rdev_from_node(mddev_t * mddev, struct evms_logical_node * node) | |
22105 | +{ | |
22106 | + struct list_head *tmp; | |
22107 | + mdk_rdev_t *rdev; | |
22108 | + | |
22109 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22110 | + if (rdev->node == node) | |
22111 | + return rdev; | |
22112 | + } | |
22113 | + return NULL; | |
22114 | +} | |
22115 | + | |
22116 | +static MD_LIST_HEAD(device_names); | |
22117 | + | |
22118 | +static char * org_partition_name (kdev_t dev) | |
22119 | +{ | |
22120 | + struct gendisk *hd; | |
22121 | + static char nomem [] = "<nomem>"; | |
22122 | + dev_name_t *dname; | |
22123 | + struct list_head *tmp = device_names.next; | |
22124 | + | |
22125 | + while (tmp != &device_names) { | |
22126 | + dname = list_entry(tmp, dev_name_t, list); | |
22127 | + if (dname->dev == dev) | |
22128 | + return dname->name; | |
22129 | + tmp = tmp->next; | |
22130 | + } | |
22131 | + | |
22132 | + dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL); | |
22133 | + | |
22134 | + if (!dname) | |
22135 | + return nomem; | |
22136 | + /* | |
22137 | + * ok, add this new device name to the list | |
22138 | + */ | |
22139 | + hd = get_gendisk (dev); | |
22140 | + dname->name = NULL; | |
22141 | + if (hd) | |
22142 | + dname->name = disk_name (hd, MINOR(dev), dname->namebuf); | |
22143 | + if (!dname->name) { | |
22144 | + sprintf (dname->namebuf, "[dev %s]", kdevname(dev)); | |
22145 | + dname->name = dname->namebuf; | |
22146 | + } | |
22147 | + | |
22148 | + dname->dev = dev; | |
22149 | + MD_INIT_LIST_HEAD(&dname->list); | |
22150 | + list_add(&dname->list, &device_names); | |
22151 | + | |
22152 | + return dname->name; | |
22153 | +} | |
22154 | + | |
22155 | + | |
22156 | +#define EVMS_MD_NULL_PARTITION_NAME "<EVMS_NODE_NO_NAME>" | |
22157 | +char * evms_md_partition_name (struct evms_logical_node *node) | |
22158 | +{ | |
22159 | + if (node && node->name) | |
22160 | + return node->name; | |
22161 | + else | |
22162 | + return EVMS_MD_NULL_PARTITION_NAME; | |
22163 | +} | |
22164 | + | |
22165 | +static char * get_partition_name (mdk_rdev_t *rdev) | |
22166 | +{ | |
22167 | + if (rdev->node) | |
22168 | + return evms_md_partition_name(rdev->node); | |
22169 | + else | |
22170 | + return org_partition_name(rdev->dev); | |
22171 | +} | |
22172 | + | |
22173 | +/* | |
22174 | + * Function: evms_md_calc_dev_sboffset | |
22175 | + * return the LSN for md super block. | |
22176 | + */ | |
22177 | +static u64 evms_md_calc_dev_sboffset (struct evms_logical_node *node,mddev_t *mddev, int persistent) | |
22178 | +{ | |
22179 | + u64 size = 0; | |
22180 | + | |
22181 | + size = node->total_vsectors; | |
22182 | + if (persistent) { | |
22183 | + size = MD_NEW_SIZE_SECTORS(size); | |
22184 | + } | |
22185 | + return size; /* size in sectors */ | |
22186 | +} | |
22187 | + | |
22188 | +/* | |
22189 | + * Function: evms_md_calc_dev_size | |
22190 | + * return data size (in blocks) for an "extended" device. | |
22191 | + */ | |
22192 | +static unsigned long evms_md_calc_dev_size (struct evms_logical_node *node, | |
22193 | + mddev_t *mddev, | |
22194 | + int persistent) | |
22195 | +{ | |
22196 | + unsigned long size; | |
22197 | + u64 size_in_sectors; | |
22198 | + | |
22199 | + size_in_sectors = evms_md_calc_dev_sboffset(node, mddev, persistent); | |
22200 | + size = size_in_sectors >> 1; | |
22201 | + if (!mddev->sb) { | |
22202 | + MD_BUG(); | |
22203 | + return size; | |
22204 | + } | |
22205 | + if (mddev->sb->chunk_size) | |
22206 | + size &= ~(mddev->sb->chunk_size/1024 - 1); | |
22207 | + return size; | |
22208 | +} | |
22209 | + | |
22210 | +static unsigned int zoned_raid_size (mddev_t *mddev) | |
22211 | +{ | |
22212 | + unsigned int mask; | |
22213 | + mdk_rdev_t * rdev; | |
22214 | + struct list_head *tmp; | |
22215 | + | |
22216 | + if (!mddev->sb) { | |
22217 | + MD_BUG(); | |
22218 | + return -EINVAL; | |
22219 | + } | |
22220 | + /* | |
22221 | + * do size and offset calculations. | |
22222 | + */ | |
22223 | + mask = ~(mddev->sb->chunk_size/1024 - 1); | |
22224 | + | |
22225 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22226 | + rdev->size &= mask; | |
22227 | + evms_md_size[mdidx(mddev)] += rdev->size; | |
22228 | + } | |
22229 | + return 0; | |
22230 | +} | |
22231 | + | |
22232 | +/* | |
22233 | + * We check wether all devices are numbered from 0 to nb_dev-1. The | |
22234 | + * order is guaranteed even after device name changes. | |
22235 | + * | |
22236 | + * Some personalities (raid0, linear) use this. Personalities that | |
22237 | + * provide data have to be able to deal with loss of individual | |
22238 | + * disks, so they do their checking themselves. | |
22239 | + */ | |
22240 | +int evms_md_check_ordering (mddev_t *mddev) | |
22241 | +{ | |
22242 | + int i, c; | |
22243 | + mdk_rdev_t *rdev; | |
22244 | + struct list_head *tmp; | |
22245 | + | |
22246 | + /* | |
22247 | + * First, all devices must be fully functional | |
22248 | + */ | |
22249 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22250 | + if (rdev->faulty) { | |
22251 | + LOG_ERROR("evms_md_check_ordering() md%d's device %s faulty, aborting.\n", | |
22252 | + mdidx(mddev), get_partition_name(rdev)); | |
22253 | + goto abort; | |
22254 | + } | |
22255 | + } | |
22256 | + | |
22257 | + c = 0; | |
22258 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22259 | + c++; | |
22260 | + } | |
22261 | + if (c != mddev->nb_dev) { | |
22262 | + MD_BUG(); | |
22263 | + goto abort; | |
22264 | + } | |
22265 | + if (mddev->nb_dev != mddev->sb->raid_disks) { | |
22266 | + LOG_ERROR("%s: [md%d] array needs %d disks, has %d, aborting.\n", | |
22267 | + __FUNCTION__, mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev); | |
22268 | + goto abort; | |
22269 | + } | |
22270 | + /* | |
22271 | + * Now the numbering check | |
22272 | + */ | |
22273 | + for (i = 0; i < mddev->nb_dev; i++) { | |
22274 | + c = 0; | |
22275 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22276 | + if (rdev->desc_nr == i) | |
22277 | + c++; | |
22278 | + } | |
22279 | + if (!c) { | |
22280 | + LOG_ERROR("md%d, missing disk #%d, aborting.\n",mdidx(mddev), i); | |
22281 | + goto abort; | |
22282 | + } | |
22283 | + if (c > 1) { | |
22284 | + LOG_ERROR("md%d, too many disks #%d, aborting.\n",mdidx(mddev), i); | |
22285 | + goto abort; | |
22286 | + } | |
22287 | + } | |
22288 | + return 0; | |
22289 | +abort: | |
22290 | + return 1; | |
22291 | +} | |
22292 | + | |
22293 | +static void remove_descriptor (mdp_disk_t *disk, mdp_super_t *sb) | |
22294 | +{ | |
22295 | + if (disk_active(disk)) { | |
22296 | + sb->working_disks--; | |
22297 | + } else { | |
22298 | + if (disk_spare(disk)) { | |
22299 | + sb->spare_disks--; | |
22300 | + sb->working_disks--; | |
22301 | + } else { | |
22302 | + sb->failed_disks--; | |
22303 | + } | |
22304 | + } | |
22305 | + sb->nr_disks--; | |
22306 | + disk->major = disk->minor = 0; | |
22307 | + mark_disk_removed(disk); | |
22308 | +} | |
22309 | + | |
22310 | +#define BAD_MINOR \ | |
22311 | +"%s: invalid raid minor (%x)\n" | |
22312 | + | |
22313 | +#define NO_SB \ | |
22314 | +"disabled device %s, could not read superblock.\n" | |
22315 | + | |
22316 | +#define BAD_CSUM \ | |
22317 | +"invalid superblock checksum on %s\n" | |
22318 | + | |
22319 | + | |
22320 | +static int alloc_array_sb (mddev_t * mddev) | |
22321 | +{ | |
22322 | + if (mddev->sb) { | |
22323 | + MD_BUG(); | |
22324 | + return 0; | |
22325 | + } | |
22326 | + | |
22327 | + mddev->sb = (mdp_super_t *) __get_free_page (GFP_KERNEL); | |
22328 | + if (!mddev->sb) { | |
22329 | + LOG_ERROR("%s: Out of memory!\n", __FUNCTION__); | |
22330 | + return -ENOMEM; | |
22331 | + } | |
22332 | + md_clear_page(mddev->sb); | |
22333 | + return 0; | |
22334 | +} | |
22335 | + | |
22336 | +static int alloc_disk_sb (mdk_rdev_t * rdev) | |
22337 | +{ | |
22338 | + if (rdev->sb) | |
22339 | + MD_BUG(); | |
22340 | + | |
22341 | + rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL); | |
22342 | + if (!rdev->sb) { | |
22343 | + LOG_ERROR("%s: Out of memory!\n", __FUNCTION__); | |
22344 | + return -EINVAL; | |
22345 | + } | |
22346 | + md_clear_page(rdev->sb); | |
22347 | + | |
22348 | + return 0; | |
22349 | +} | |
22350 | + | |
22351 | +/* | |
22352 | + * Function: free_disk_sb | |
22353 | + * | |
22354 | + */ | |
22355 | +static void free_disk_sb (mdk_rdev_t * rdev) | |
22356 | +{ | |
22357 | + if (rdev->sb) { | |
22358 | + free_page((unsigned long) rdev->sb); | |
22359 | + rdev->sb = NULL; | |
22360 | + rdev->sb_offset = 0; | |
22361 | + rdev->size = 0; | |
22362 | + } else { | |
22363 | + if (!rdev->virtual_spare && !rdev->faulty) | |
22364 | + MD_BUG(); | |
22365 | + } | |
22366 | +} | |
22367 | + | |
22368 | +/* | |
22369 | + * Function: evms_md_read_disk_sb | |
22370 | + * Read the MD superblock. | |
22371 | + */ | |
22372 | +static int evms_md_read_disk_sb (mdk_rdev_t * rdev) | |
22373 | +{ | |
22374 | + int rc = 0; | |
22375 | + struct evms_logical_node *node = rdev->node; | |
22376 | + u64 sb_offset_in_sectors; | |
22377 | + | |
22378 | + if (!rdev->sb) { | |
22379 | + MD_BUG(); | |
22380 | + return -EINVAL; | |
22381 | + } | |
22382 | + if (node->total_vsectors <= MD_RESERVED_SECTORS) { | |
22383 | + LOG_DETAILS("%s is too small, total_vsectors("PFU64")\n", | |
22384 | + evms_md_partition_name(node), node->total_vsectors); | |
22385 | + return -EINVAL; | |
22386 | + } | |
22387 | + | |
22388 | + /* | |
22389 | + * Calculate the position of the superblock, | |
22390 | + * it's at the end of the disk | |
22391 | + */ | |
22392 | + sb_offset_in_sectors = evms_md_calc_dev_sboffset(node, rdev->mddev, 1); | |
22393 | + rdev->sb_offset = (unsigned long)(sb_offset_in_sectors >> 1); | |
22394 | + LOG_DEBUG("(read) %s's sb offset("PFU64") total_vsectors("PFU64")\n", | |
22395 | + evms_md_partition_name(node), sb_offset_in_sectors, node->total_vsectors); | |
22396 | + | |
22397 | + /* | |
22398 | + * Read superblock | |
22399 | + */ | |
22400 | + rc = INIT_IO(node, 0, sb_offset_in_sectors, MD_SB_SECTORS, rdev->sb); | |
22401 | + | |
22402 | + return rc; | |
22403 | +} | |
22404 | + | |
22405 | +static unsigned int calc_sb_csum (mdp_super_t * sb) | |
22406 | +{ | |
22407 | + unsigned int disk_csum, csum; | |
22408 | + | |
22409 | + disk_csum = sb->sb_csum; | |
22410 | + sb->sb_csum = 0; | |
22411 | + csum = csum_partial((void *)sb, MD_SB_BYTES, 0); | |
22412 | + sb->sb_csum = disk_csum; | |
22413 | + return csum; | |
22414 | +} | |
22415 | + | |
22416 | + | |
22417 | + | |
22418 | +/* | |
22419 | + * Check one RAID superblock for generic plausibility | |
22420 | + */ | |
22421 | + | |
22422 | +static int check_disk_sb (mdk_rdev_t * rdev) | |
22423 | +{ | |
22424 | + mdp_super_t *sb; | |
22425 | + int ret = -EINVAL; | |
22426 | + | |
22427 | + sb = rdev->sb; | |
22428 | + if (!sb) { | |
22429 | + MD_BUG(); | |
22430 | + goto abort; | |
22431 | + } | |
22432 | + | |
22433 | + if (sb->md_magic != MD_SB_MAGIC) { | |
22434 | + goto abort; | |
22435 | + } | |
22436 | + | |
22437 | + if (sb->md_minor >= MAX_MD_DEVS) { | |
22438 | + LOG_ERROR(BAD_MINOR, get_partition_name(rdev), sb->md_minor); | |
22439 | + goto abort; | |
22440 | + } | |
22441 | + if (calc_sb_csum(sb) != sb->sb_csum) { | |
22442 | + LOG_ERROR(BAD_CSUM, get_partition_name(rdev)); | |
22443 | + goto abort; | |
22444 | + } | |
22445 | + | |
22446 | + switch (sb->level) { | |
22447 | + case -1: | |
22448 | + case 0: | |
22449 | + case 1: | |
22450 | + case 5: | |
22451 | + break; | |
22452 | + default: | |
22453 | + LOG_ERROR("%s: EVMS MD does not support MD level %d\n", __FUNCTION__, sb->level); | |
22454 | + goto abort; | |
22455 | + } | |
22456 | + ret = 0; | |
22457 | +abort: | |
22458 | + return ret; | |
22459 | +} | |
22460 | + | |
22461 | +static kdev_t dev_unit(kdev_t dev) | |
22462 | +{ | |
22463 | + unsigned int mask; | |
22464 | + struct gendisk *hd = get_gendisk(dev); | |
22465 | + | |
22466 | + if (!hd) | |
22467 | + return 0; | |
22468 | + mask = ~((1 << hd->minor_shift) - 1); | |
22469 | + | |
22470 | + return MKDEV(MAJOR(dev), MINOR(dev) & mask); | |
22471 | +} | |
22472 | + | |
22473 | +static mdk_rdev_t * match_dev_unit(mddev_t *mddev, kdev_t dev) | |
22474 | +{ | |
22475 | + struct list_head *tmp; | |
22476 | + mdk_rdev_t *rdev; | |
22477 | + | |
22478 | + ITERATE_RDEV(mddev,rdev,tmp) | |
22479 | + if (dev_unit(rdev->dev) == dev_unit(dev)) | |
22480 | + return rdev; | |
22481 | + | |
22482 | + return NULL; | |
22483 | +} | |
22484 | + | |
22485 | +static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |
22486 | +{ | |
22487 | + struct list_head *tmp; | |
22488 | + mdk_rdev_t *rdev; | |
22489 | + | |
22490 | + ITERATE_RDEV(mddev1,rdev,tmp) | |
22491 | + if (match_dev_unit(mddev2, rdev->dev)) | |
22492 | + return 1; | |
22493 | + | |
22494 | + return 0; | |
22495 | +} | |
22496 | + | |
22497 | + | |
22498 | +static void bind_rdev_to_array (mdk_rdev_t * rdev, mddev_t * mddev) | |
22499 | +{ | |
22500 | + mdk_rdev_t *same_pdev; | |
22501 | + | |
22502 | + if (rdev->mddev) { | |
22503 | + MD_BUG(); | |
22504 | + return; | |
22505 | + } | |
22506 | + | |
22507 | + same_pdev = match_dev_unit(mddev, rdev->dev); | |
22508 | + if (same_pdev) | |
22509 | + LOG_WARNING("[md%d] WARNING: %s appears to be on the same physical disk as %s. True\n" | |
22510 | + " protection against single-disk failure might be compromised.\n", | |
22511 | + mdidx(mddev), get_partition_name(rdev),get_partition_name(same_pdev)); | |
22512 | + | |
22513 | + list_add(&rdev->same_set, &mddev->disks); | |
22514 | + rdev->mddev = mddev; | |
22515 | + mddev->nb_dev++; | |
22516 | + if (rdev->sb && disk_active(&rdev->sb->this_disk)) | |
22517 | + mddev->nr_raid_disks++; | |
22518 | + LOG_DETAILS("bind<%s,%d>\n", get_partition_name(rdev), rdev->mddev->nb_dev); | |
22519 | +} | |
22520 | + | |
22521 | +static void unbind_rdev_from_array (mdk_rdev_t * rdev) | |
22522 | +{ | |
22523 | + if (!rdev->mddev) { | |
22524 | + MD_BUG(); | |
22525 | + return; | |
22526 | + } | |
22527 | + list_del(&rdev->same_set); | |
22528 | + MD_INIT_LIST_HEAD(&rdev->same_set); | |
22529 | + rdev->mddev->nb_dev--; | |
22530 | + if (rdev->sb && disk_active(&rdev->sb->this_disk)) | |
22531 | + rdev->mddev->nr_raid_disks--; | |
22532 | + LOG_DETAILS("unbind<%s,%d>\n", get_partition_name(rdev), rdev->mddev->nb_dev); | |
22533 | + rdev->mddev = NULL; | |
22534 | +} | |
22535 | + | |
22536 | + | |
22537 | +/* | |
22538 | + * Function: evms_md_export_rdev | |
22539 | + * EVMS MD version of export_rdev() | |
22540 | + * Discard this MD "extended" device | |
22541 | + */ | |
22542 | +static void evms_md_export_rdev (mdk_rdev_t * rdev, int delete_node) | |
22543 | +{ | |
22544 | + LOG_DETAILS("%s: (%s)\n", __FUNCTION__ , get_partition_name(rdev)); | |
22545 | + if (rdev->mddev) | |
22546 | + MD_BUG(); | |
22547 | + free_disk_sb(rdev); | |
22548 | + list_del(&rdev->all); | |
22549 | + MD_INIT_LIST_HEAD(&rdev->all); | |
22550 | + if (rdev->pending.next != &rdev->pending) { | |
22551 | + LOG_WARNING("%s: (%s was pending)\n",__FUNCTION__ ,get_partition_name(rdev)); | |
22552 | + list_del(&rdev->pending); | |
22553 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
22554 | + } | |
22555 | + if (rdev->node && delete_node) { | |
22556 | + if (cur_discover_list) { | |
22557 | + LOG_DETAILS("%s: remove (%s) from discover list.\n", __FUNCTION__, | |
22558 | + get_partition_name(rdev)); | |
22559 | + evms_cs_remove_logical_node_from_list(cur_discover_list, rdev->node); | |
22560 | + } | |
22561 | + LOG_DETAILS("%s: deleting node %s\n", __FUNCTION__, get_partition_name(rdev)); | |
22562 | + DELETE(rdev->node); | |
22563 | + rdev->node = NULL; | |
22564 | + } | |
22565 | + rdev->dev = 0; | |
22566 | + rdev->faulty = 0; | |
22567 | + kfree(rdev); | |
22568 | +} | |
22569 | + | |
22570 | + | |
22571 | +static void kick_rdev_from_array (mdk_rdev_t * rdev) | |
22572 | +{ | |
22573 | + LOG_DEFAULT("%s: (%s)\n", __FUNCTION__,get_partition_name(rdev)); | |
22574 | + unbind_rdev_from_array(rdev); | |
22575 | + evms_md_export_rdev(rdev, TRUE); | |
22576 | +} | |
22577 | + | |
22578 | +static void export_array (mddev_t *mddev) | |
22579 | +{ | |
22580 | + struct list_head *tmp; | |
22581 | + mdk_rdev_t *rdev; | |
22582 | + mdp_super_t *sb = mddev->sb; | |
22583 | + | |
22584 | + LOG_DEFAULT("%s: [md%d]\n",__FUNCTION__ ,mdidx(mddev)); | |
22585 | + if (mddev->sb) { | |
22586 | + mddev->sb = NULL; | |
22587 | + free_page((unsigned long) sb); | |
22588 | + } | |
22589 | + | |
22590 | + LOG_DEBUG("%s: removing all extended devices belong to md%d\n",__FUNCTION__,mdidx(mddev)); | |
22591 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22592 | + if (!rdev->mddev) { | |
22593 | + MD_BUG(); | |
22594 | + continue; | |
22595 | + } | |
22596 | + kick_rdev_from_array(rdev); | |
22597 | + } | |
22598 | + if (mddev->nb_dev) | |
22599 | + MD_BUG(); | |
22600 | +} | |
22601 | + | |
22602 | +static void free_mddev (mddev_t *mddev) | |
22603 | +{ | |
22604 | + struct evms_logical_node *node; | |
22605 | + struct evms_md *evms_md; | |
22606 | + | |
22607 | + if (!mddev) { | |
22608 | + MD_BUG(); | |
22609 | + return; | |
22610 | + } | |
22611 | + | |
22612 | + node = mddev->node; | |
22613 | + | |
22614 | + export_array(mddev); | |
22615 | + evms_md_size[mdidx(mddev)] = 0; | |
22616 | + | |
22617 | + | |
22618 | + /* | |
22619 | + * Make sure nobody else is using this mddev | |
22620 | + * (careful, we rely on the global kernel lock here) | |
22621 | + */ | |
22622 | + while (atomic_read(&mddev->resync_sem.count) != 1) | |
22623 | + schedule(); | |
22624 | + while (atomic_read(&mddev->recovery_sem.count) != 1) | |
22625 | + schedule(); | |
22626 | + | |
22627 | + evms_md_del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev))); | |
22628 | + list_del(&mddev->all_mddevs); | |
22629 | + INIT_LIST_HEAD(&mddev->all_mddevs); | |
22630 | + if (!list_empty(&mddev->running_mddevs)) { | |
22631 | + list_del(&mddev->running_mddevs); | |
22632 | + INIT_LIST_HEAD(&mddev->running_mddevs); | |
22633 | + } | |
22634 | + if (!list_empty(&mddev->incomplete_mddevs)) { | |
22635 | + list_del(&mddev->incomplete_mddevs); | |
22636 | + INIT_LIST_HEAD(&mddev->incomplete_mddevs); | |
22637 | + } | |
22638 | + | |
22639 | + kfree(mddev); | |
22640 | + if (node) { | |
22641 | + evms_md = node->private; | |
22642 | + evms_md->mddev = NULL; | |
22643 | + } | |
22644 | + MOD_DEC_USE_COUNT; | |
22645 | + evms_md_destroy_recovery_thread(); | |
22646 | +} | |
22647 | + | |
22648 | + | |
22649 | +static void print_desc(mdp_disk_t *desc) | |
22650 | +{ | |
22651 | + printk(" DISK<N:%d,R:%d,S:%d>\n", desc->number, | |
22652 | + desc->raid_disk,desc->state); | |
22653 | +} | |
22654 | + | |
22655 | +static void print_sb(mdp_super_t *sb) | |
22656 | +{ | |
22657 | + int i; | |
22658 | + | |
22659 | + printk(" SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n", | |
22660 | + sb->major_version, sb->minor_version, sb->patch_version, | |
22661 | + sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3, | |
22662 | + sb->ctime); | |
22663 | + printk(" L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", sb->level, | |
22664 | + sb->size, sb->nr_disks, sb->raid_disks, sb->md_minor, | |
22665 | + sb->layout, sb->chunk_size); | |
22666 | + printk(" UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%x\n", | |
22667 | + sb->utime, sb->state, sb->active_disks, sb->working_disks, | |
22668 | + sb->failed_disks, sb->spare_disks, | |
22669 | + sb->sb_csum, sb->events_lo); | |
22670 | + | |
22671 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
22672 | + mdp_disk_t *desc; | |
22673 | + | |
22674 | + desc = sb->disks + i; | |
22675 | + if (desc->number || desc->major || desc->minor || desc->raid_disk || (desc->state && (desc->state != 4))) { | |
22676 | + printk(" D %2d: ", i); | |
22677 | + print_desc(desc); | |
22678 | + } | |
22679 | + } | |
22680 | + printk(" THIS: "); | |
22681 | + print_desc(&sb->this_disk); | |
22682 | + | |
22683 | +} | |
22684 | + | |
22685 | +static void print_rdev(mdk_rdev_t *rdev) | |
22686 | +{ | |
22687 | + printk("rdev %s: SZ:%08ld F:%d DN:%d ", | |
22688 | + get_partition_name(rdev), | |
22689 | + rdev->size, rdev->faulty, rdev->desc_nr); | |
22690 | + if (rdev->sb) { | |
22691 | + printk("rdev superblock:\n"); | |
22692 | + print_sb(rdev->sb); | |
22693 | + } else | |
22694 | + printk("no rdev superblock!\n"); | |
22695 | +} | |
22696 | + | |
22697 | +void evms_md_print_devices (void) | |
22698 | +{ | |
22699 | + struct list_head *tmp, *tmp2; | |
22700 | + mdk_rdev_t *rdev; | |
22701 | + mddev_t *mddev; | |
22702 | + | |
22703 | + printk("\n"); | |
22704 | + printk(": **********************************\n"); | |
22705 | + printk(": * <COMPLETE RAID STATE PRINTOUT> *\n"); | |
22706 | + printk(": **********************************\n"); | |
22707 | + ITERATE_MDDEV(mddev,tmp) { | |
22708 | + printk("md%d: ", mdidx(mddev)); | |
22709 | + | |
22710 | + ITERATE_RDEV(mddev,rdev,tmp2) | |
22711 | + printk("<%s>", get_partition_name(rdev)); | |
22712 | + | |
22713 | + if (mddev->sb) { | |
22714 | + printk(" array superblock:\n"); | |
22715 | + print_sb(mddev->sb); | |
22716 | + } else | |
22717 | + printk(" no array superblock.\n"); | |
22718 | + | |
22719 | + ITERATE_RDEV(mddev,rdev,tmp2) | |
22720 | + print_rdev(rdev); | |
22721 | + } | |
22722 | + printk(": **********************************\n"); | |
22723 | + printk("\n"); | |
22724 | +} | |
22725 | + | |
22726 | +static int sb_equal ( mdp_super_t *sb1, mdp_super_t *sb2) | |
22727 | +{ | |
22728 | + int ret; | |
22729 | + mdp_super_t *tmp1, *tmp2; | |
22730 | + | |
22731 | + tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL); | |
22732 | + tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL); | |
22733 | + | |
22734 | + if (!tmp1 || !tmp2) { | |
22735 | + ret = 0; | |
22736 | + printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n"); | |
22737 | + goto abort; | |
22738 | + } | |
22739 | + | |
22740 | + *tmp1 = *sb1; | |
22741 | + *tmp2 = *sb2; | |
22742 | + | |
22743 | + /* | |
22744 | + * nr_disks is not constant | |
22745 | + */ | |
22746 | + tmp1->nr_disks = 0; | |
22747 | + tmp2->nr_disks = 0; | |
22748 | + | |
22749 | + if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) | |
22750 | + ret = 0; | |
22751 | + else | |
22752 | + ret = 1; | |
22753 | + | |
22754 | +abort: | |
22755 | + if (tmp1) | |
22756 | + kfree(tmp1); | |
22757 | + if (tmp2) | |
22758 | + kfree(tmp2); | |
22759 | + | |
22760 | + return ret; | |
22761 | +} | |
22762 | + | |
22763 | +static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2) | |
22764 | +{ | |
22765 | + if ( (rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) && | |
22766 | + (rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) && | |
22767 | + (rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) && | |
22768 | + (rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3)) | |
22769 | + | |
22770 | + return 1; | |
22771 | + | |
22772 | + return 0; | |
22773 | +} | |
22774 | + | |
22775 | +/* | |
22776 | + * Function: evms_md_find_rdev_all | |
22777 | + * EVMS MD version of find_rdev_all() | |
22778 | + * Search entire all_raid_disks for "node" | |
22779 | + * Return the MD "extended" device if found. | |
22780 | + */ | |
22781 | +static mdk_rdev_t * evms_md_find_rdev_all (struct evms_logical_node *node) | |
22782 | +{ | |
22783 | + struct list_head *tmp; | |
22784 | + mdk_rdev_t *rdev; | |
22785 | + | |
22786 | + tmp = all_raid_disks.next; | |
22787 | + while (tmp != &all_raid_disks) { | |
22788 | + rdev = list_entry(tmp, mdk_rdev_t, all); | |
22789 | + if (rdev->node == node) | |
22790 | + return rdev; | |
22791 | + tmp = tmp->next; | |
22792 | + } | |
22793 | + return NULL; | |
22794 | +} | |
22795 | + | |
22796 | +/* | |
22797 | + * Function: evms_md_find_mddev_all | |
22798 | + */ | |
22799 | +static mddev_t * evms_md_find_mddev_all (struct evms_logical_node *node) | |
22800 | +{ | |
22801 | + struct list_head *tmp; | |
22802 | + mddev_t *mddev; | |
22803 | + | |
22804 | + ITERATE_MDDEV(mddev,tmp) { | |
22805 | + if (mddev->node == node) | |
22806 | + return mddev; | |
22807 | + } | |
22808 | + return NULL; | |
22809 | +} | |
22810 | + | |
22811 | + | |
22812 | +/* | |
22813 | + * Function: evms_md_write_disk_sb | |
22814 | + * EVMS MD version of write_disk_sb | |
22815 | + */ | |
22816 | +static int evms_md_write_disk_sb(mdk_rdev_t * rdev) | |
22817 | +{ | |
22818 | + unsigned long size; | |
22819 | + u64 sb_offset_in_sectors; | |
22820 | + | |
22821 | + if (!rdev->sb) { | |
22822 | + MD_BUG(); | |
22823 | + return 1; | |
22824 | + } | |
22825 | + if (rdev->faulty) { | |
22826 | + MD_BUG(); | |
22827 | + return 1; | |
22828 | + } | |
22829 | + if (rdev->sb->md_magic != MD_SB_MAGIC) { | |
22830 | + MD_BUG(); | |
22831 | + return 1; | |
22832 | + } | |
22833 | + | |
22834 | + sb_offset_in_sectors = evms_md_calc_dev_sboffset(rdev->node, rdev->mddev, 1); | |
22835 | + if (rdev->sb_offset != (sb_offset_in_sectors >> 1)) { | |
22836 | + LOG_WARNING("%s's sb offset has changed from blocks(%ld) to blocks(%ld), skipping\n", | |
22837 | + get_partition_name(rdev), | |
22838 | + rdev->sb_offset, | |
22839 | + (unsigned long)(sb_offset_in_sectors >> 1)); | |
22840 | + goto skip; | |
22841 | + } | |
22842 | + /* | |
22843 | + * If the disk went offline meanwhile and it's just a spare, then | |
22844 | + * its size has changed to zero silently, and the MD code does | |
22845 | + * not yet know that it's faulty. | |
22846 | + */ | |
22847 | + size = evms_md_calc_dev_size(rdev->node, rdev->mddev, 1); | |
22848 | + if (size != rdev->size) { | |
22849 | + LOG_WARNING("%s's size has changed from %ld to %ld since import, skipping\n", | |
22850 | + get_partition_name(rdev), rdev->size, size); | |
22851 | + goto skip; | |
22852 | + } | |
22853 | + | |
22854 | + LOG_DETAILS("(write) %s's sb offset: "PFU64"\n",get_partition_name(rdev), sb_offset_in_sectors); | |
22855 | + | |
22856 | + INIT_IO(rdev->node,WRITE,sb_offset_in_sectors,MD_SB_SECTORS,rdev->sb); | |
22857 | + | |
22858 | +skip: | |
22859 | + return 0; | |
22860 | +} | |
22861 | + | |
22862 | +static int evms_md_sync_sbs(mddev_t * mddev) | |
22863 | +{ | |
22864 | + mdk_rdev_t *rdev; | |
22865 | + struct list_head *tmp; | |
22866 | + mdp_disk_t * disk; | |
22867 | + | |
22868 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22869 | + if (rdev->virtual_spare || rdev->faulty) | |
22870 | + continue; | |
22871 | + | |
22872 | + /* copy everything from the master */ | |
22873 | + memcpy(rdev->sb, mddev->sb, sizeof(mdp_super_t)); | |
22874 | + | |
22875 | + /* this_disk is unique, copy it from the master */ | |
22876 | +// rdev->sb->this_disk = mddev->sb->disks[rdev->desc_nr]; | |
22877 | + // use the SB disk array since if update occurred on normal shutdown | |
22878 | + // the rdevs may be out of date. | |
22879 | + disk = evms_md_find_disk(mddev, rdev->dev); | |
22880 | + if (disk) { | |
22881 | + rdev->sb->this_disk = *disk; | |
22882 | + } | |
22883 | + | |
22884 | + rdev->sb->sb_csum = calc_sb_csum(rdev->sb); | |
22885 | + } | |
22886 | + return 0; | |
22887 | +} | |
22888 | + | |
22889 | +static int evms_md_update_sb_sync(mddev_t * mddev, int clean) | |
22890 | +{ | |
22891 | + mdk_rdev_t *rdev; | |
22892 | + struct list_head *tmp; | |
22893 | + int rc = 0; | |
22894 | + int found = FALSE; | |
22895 | + | |
22896 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22897 | + | |
22898 | + if (rdev->virtual_spare || rdev->faulty) | |
22899 | + continue; | |
22900 | + | |
22901 | + if ((rc = evms_md_read_disk_sb(rdev))) { | |
22902 | + LOG_ERROR("%s: error reading superblock on %s!\n", | |
22903 | + __FUNCTION__, evms_md_partition_name(rdev->node)); | |
22904 | + break; | |
22905 | + } | |
22906 | + | |
22907 | + if ((rc = check_disk_sb(rdev))) { | |
22908 | + LOG_ERROR("%s: %s has invalid sb!\n", | |
22909 | + __FUNCTION__, evms_md_partition_name(rdev->node)); | |
22910 | + break; | |
22911 | + } | |
22912 | + | |
22913 | + rdev->desc_nr = rdev->sb->this_disk.number; | |
22914 | + rdev->dev = MKDEV(rdev->sb->this_disk.major, rdev->sb->this_disk.minor); | |
22915 | + | |
22916 | + /* copy master superlbock from the first good rdev */ | |
22917 | + if (!found) { | |
22918 | + found = TRUE; | |
22919 | + memcpy(mddev->sb, rdev->sb, sizeof(mdp_super_t)); | |
22920 | + if (clean) | |
22921 | + mddev->sb->state |= 1 << MD_SB_CLEAN; | |
22922 | + else | |
22923 | + mddev->sb->state &= ~(1 << MD_SB_CLEAN); | |
22924 | + } | |
22925 | + } | |
22926 | + if (!rc && found) { | |
22927 | + evms_md_update_sb(mddev); | |
22928 | + } else { | |
22929 | + LOG_SERIOUS("%s: BUG! BUG! superblocks will not be updated!\n", __FUNCTION__); | |
22930 | + } | |
22931 | + return rc; | |
22932 | + | |
22933 | +} | |
22934 | + | |
22935 | +int evms_md_update_sb(mddev_t * mddev) | |
22936 | +{ | |
22937 | + int err, count = 100; | |
22938 | + struct list_head *tmp; | |
22939 | + mdk_rdev_t *rdev; | |
22940 | + | |
22941 | + | |
22942 | +repeat: | |
22943 | + mddev->sb->utime = CURRENT_TIME; | |
22944 | + if ((++mddev->sb->events_lo)==0) | |
22945 | + ++mddev->sb->events_hi; | |
22946 | + | |
22947 | + if ((mddev->sb->events_lo|mddev->sb->events_hi)==0) { | |
22948 | + /* | |
22949 | + * oops, this 64-bit counter should never wrap. | |
22950 | + * Either we are in around ~1 trillion A.C., assuming | |
22951 | + * 1 reboot per second, or we have a bug: | |
22952 | + */ | |
22953 | + MD_BUG(); | |
22954 | + mddev->sb->events_lo = mddev->sb->events_hi = 0xffffffff; | |
22955 | + } | |
22956 | + evms_md_sync_sbs(mddev); | |
22957 | + | |
22958 | + /* | |
22959 | + * do not write anything to disk if using | |
22960 | + * nonpersistent superblocks | |
22961 | + */ | |
22962 | + if (mddev->sb->not_persistent) | |
22963 | + return 0; | |
22964 | + | |
22965 | + LOG_DETAILS("%s: updating [md%d] superblock\n",__FUNCTION__ ,mdidx(mddev)); | |
22966 | + | |
22967 | + err = 0; | |
22968 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
22969 | + if (!rdev->virtual_spare && !rdev->faulty) { | |
22970 | + LOG_DETAILS(" %s [events: %x]", | |
22971 | + get_partition_name(rdev), | |
22972 | + rdev->sb->events_lo); | |
22973 | + err += evms_md_write_disk_sb(rdev); | |
22974 | + } else { | |
22975 | + if (rdev->faulty) | |
22976 | + LOG_DETAILS(" skipping faulty %s\n", get_partition_name(rdev)); | |
22977 | + if (rdev->virtual_spare) | |
22978 | + LOG_DETAILS(" skipping virtual spare.\n"); | |
22979 | + } | |
22980 | + } | |
22981 | + if (err) { | |
22982 | + if (--count) { | |
22983 | + LOG_WARNING("errors occurred during superblock update, repeating\n"); | |
22984 | + goto repeat; | |
22985 | + } | |
22986 | + LOG_ERROR("excessive errors occurred during superblock update, exiting\n"); | |
22987 | + } | |
22988 | + return 0; | |
22989 | +} | |
22990 | + | |
22991 | +/* | |
22992 | + * Function: evms_md_import_device | |
22993 | + * Insure that node is not yet imported. | |
22994 | + * Read and validate the MD super block on this device | |
22995 | + * Add to the global MD "extended" devices list (all_raid_disks) | |
22996 | + * | |
22997 | + */ | |
22998 | +static int evms_md_import_device (struct evms_logical_node **discover_list, | |
22999 | + struct evms_logical_node *node) | |
23000 | +{ | |
23001 | + int err; | |
23002 | + mdk_rdev_t *rdev; | |
23003 | + | |
23004 | + LOG_ENTRY_EXIT("%s: discovering %s\n",__FUNCTION__,evms_md_partition_name(node)); | |
23005 | + | |
23006 | + if (evms_md_find_rdev_all(node)) { | |
23007 | + LOG_DEBUG("%s exists\n", evms_md_partition_name(node)); | |
23008 | + return -EEXIST; | |
23009 | + } | |
23010 | + | |
23011 | + rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL); | |
23012 | + if (!rdev) { | |
23013 | + LOG_ERROR("could not alloc mem for %s!\n", evms_md_partition_name(node)); | |
23014 | + return -ENOMEM; | |
23015 | + } | |
23016 | + memset(rdev, 0, sizeof(*rdev)); | |
23017 | + | |
23018 | + if ((err = alloc_disk_sb(rdev))) | |
23019 | + goto abort_free; | |
23020 | + | |
23021 | + rdev->node = node; /* set this for evms_md_read_disk_sb() */ | |
23022 | + | |
23023 | + rdev->desc_nr = -1; | |
23024 | + rdev->faulty = 0; | |
23025 | + | |
23026 | + if (!node->total_vsectors) { | |
23027 | + LOG_ERROR("%s has zero size!\n", evms_md_partition_name(node)); | |
23028 | + err = -EINVAL; | |
23029 | + goto abort_free; | |
23030 | + } | |
23031 | + | |
23032 | + if ((err = evms_md_read_disk_sb(rdev))) { | |
23033 | + LOG_EXTRA("could not read %s's sb, not importing!\n",evms_md_partition_name(node)); | |
23034 | + goto abort_free; | |
23035 | + } | |
23036 | + if ((err = check_disk_sb(rdev))) { | |
23037 | + LOG_EXTRA("%s has invalid sb, not importing!\n",evms_md_partition_name(node)); | |
23038 | + goto abort_free; | |
23039 | + } | |
23040 | + rdev->desc_nr = rdev->sb->this_disk.number; | |
23041 | + rdev->dev = MKDEV(rdev->sb->this_disk.major, rdev->sb->this_disk.minor); | |
23042 | + LOG_DETAILS("FOUND %s desc_nr(%d)\n", get_partition_name(rdev), rdev->desc_nr); | |
23043 | + list_add(&rdev->all, &all_raid_disks); | |
23044 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
23045 | + | |
23046 | + if (rdev->faulty && rdev->sb) | |
23047 | + free_disk_sb(rdev); | |
23048 | + | |
23049 | + return 0; | |
23050 | + | |
23051 | +abort_free: | |
23052 | + if (rdev->sb) { | |
23053 | + free_disk_sb(rdev); | |
23054 | + } | |
23055 | + kfree(rdev); | |
23056 | + return err; | |
23057 | +} | |
23058 | + | |
23059 | + | |
23060 | + | |
23061 | +/* | |
23062 | + * Function: evms_md_analyze_sbs | |
23063 | + * EVMS MD version of analyze_sbs() | |
23064 | + */ | |
23065 | +static int evms_md_analyze_sbs (mddev_t * mddev) | |
23066 | +{ | |
23067 | + int out_of_date = 0, i; | |
23068 | + struct list_head *tmp, *tmp2; | |
23069 | + mdk_rdev_t *rdev, *rdev2, *freshest; | |
23070 | + mdp_super_t *sb; | |
23071 | + | |
23072 | + LOG_ENTRY_EXIT("Analyzing all superblocks...\n"); | |
23073 | + /* | |
23074 | + * Verify the RAID superblock on each real device | |
23075 | + */ | |
23076 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23077 | + if (rdev->faulty) { | |
23078 | + MD_BUG(); | |
23079 | + goto abort; | |
23080 | + } | |
23081 | + if (!rdev->sb) { | |
23082 | + MD_BUG(); | |
23083 | + goto abort; | |
23084 | + } | |
23085 | + if (check_disk_sb(rdev)) | |
23086 | + goto abort; | |
23087 | + } | |
23088 | + | |
23089 | + /* | |
23090 | + * The superblock constant part has to be the same | |
23091 | + * for all disks in the array. | |
23092 | + */ | |
23093 | + sb = NULL; | |
23094 | + | |
23095 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23096 | + if (!sb) { | |
23097 | + sb = rdev->sb; | |
23098 | + continue; | |
23099 | + } | |
23100 | + if (!sb_equal(sb, rdev->sb)) { | |
23101 | + LOG_WARNING("kick out %s\n",get_partition_name(rdev)); | |
23102 | + kick_rdev_from_array(rdev); | |
23103 | + continue; | |
23104 | + } | |
23105 | + } | |
23106 | + | |
23107 | + /* | |
23108 | + * OK, we have all disks and the array is ready to run. Let's | |
23109 | + * find the freshest superblock, that one will be the superblock | |
23110 | + * that represents the whole array. | |
23111 | + */ | |
23112 | + if (!mddev->sb) | |
23113 | + if (alloc_array_sb(mddev)) | |
23114 | + goto abort; | |
23115 | + sb = mddev->sb; | |
23116 | + freshest = NULL; | |
23117 | + | |
23118 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23119 | + __u64 ev1, ev2; | |
23120 | + /* | |
23121 | + * if the checksum is invalid, use the superblock | |
23122 | + * only as a last resort. (decrease it's age by | |
23123 | + * one event) | |
23124 | + */ | |
23125 | + if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) { | |
23126 | + if (rdev->sb->events_lo || rdev->sb->events_hi) | |
23127 | + if ((rdev->sb->events_lo--)==0) | |
23128 | + rdev->sb->events_hi--; | |
23129 | + } | |
23130 | + LOG_DETAILS("%s's event counter: %x\n",get_partition_name(rdev), rdev->sb->events_lo); | |
23131 | + | |
23132 | + if (!freshest) { | |
23133 | + freshest = rdev; | |
23134 | + continue; | |
23135 | + } | |
23136 | + /* | |
23137 | + * Find the newest superblock version | |
23138 | + */ | |
23139 | + ev1 = md_event(rdev->sb); | |
23140 | + ev2 = md_event(freshest->sb); | |
23141 | + if (ev1 != ev2) { | |
23142 | + out_of_date = 1; | |
23143 | + if (ev1 > ev2) | |
23144 | + freshest = rdev; | |
23145 | + } | |
23146 | + } | |
23147 | + if (out_of_date) { | |
23148 | + LOG_WARNING("OUT OF DATE, freshest: %s\n",get_partition_name(freshest)); | |
23149 | + } | |
23150 | + memcpy (sb, freshest->sb, sizeof(*sb)); | |
23151 | + | |
23152 | + /* | |
23153 | + * at this point we have picked the 'best' superblock | |
23154 | + * from all available superblocks. | |
23155 | + * now we validate this superblock and kick out possibly | |
23156 | + * failed disks. | |
23157 | + */ | |
23158 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23159 | + /* | |
23160 | + * Kick all non-fresh devices | |
23161 | + */ | |
23162 | + __u64 ev1, ev2; | |
23163 | + ev1 = md_event(rdev->sb); | |
23164 | + ev2 = md_event(sb); | |
23165 | + if (ev1 < ev2) { | |
23166 | + if (ev1) { | |
23167 | + LOG_WARNING("kicking non-fresh %s from array!\n",get_partition_name(rdev)); | |
23168 | + kick_rdev_from_array(rdev); | |
23169 | + continue; | |
23170 | + } else { | |
23171 | + LOG_DETAILS("%s is a new spare.\n",get_partition_name(rdev)); | |
23172 | + } | |
23173 | + } | |
23174 | + } | |
23175 | + | |
23176 | + /* | |
23177 | + * Remove unavailable and faulty devices ... | |
23178 | + * | |
23179 | + * note that if an array becomes completely unrunnable due to | |
23180 | + * missing devices, we do not write the superblock back, so the | |
23181 | + * administrator has a chance to fix things up. The removal thus | |
23182 | + * only happens if it's nonfatal to the contents of the array. | |
23183 | + */ | |
23184 | + for (i = 0; i < MD_SB_DISKS; i++) { | |
23185 | + int found; | |
23186 | + mdp_disk_t *desc; | |
23187 | + | |
23188 | + desc = sb->disks + i; | |
23189 | + | |
23190 | + /* | |
23191 | + * We kick faulty devices/descriptors immediately. | |
23192 | + * | |
23193 | + * Note: multipath devices are a special case. Since we | |
23194 | + * were able to read the superblock on the path, we don't | |
23195 | + * care if it was previously marked as faulty, it's up now | |
23196 | + * so enable it. | |
23197 | + */ | |
23198 | + if (disk_faulty(desc) && mddev->sb->level != -4) { | |
23199 | + found = 0; | |
23200 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23201 | + if (rdev->desc_nr != desc->number) | |
23202 | + continue; | |
23203 | + LOG_WARNING("[md%d] kicking faulty %s!\n",mdidx(mddev),get_partition_name(rdev)); | |
23204 | + kick_rdev_from_array(rdev); | |
23205 | + found = 1; | |
23206 | + break; | |
23207 | + } | |
23208 | + if (!found) { | |
23209 | + LOG_WARNING("%s: [md%d] found former faulty device [number=%d]\n", | |
23210 | + __FUNCTION__ ,mdidx(mddev), desc->number); | |
23211 | + } | |
23212 | + /* | |
23213 | + * Don't call remove_descriptor(), | |
23214 | + * let the administrator remove it from the user-land */ | |
23215 | + /* remove_descriptor(desc, sb); */ | |
23216 | + continue; | |
23217 | + } else if (disk_faulty(desc)) { | |
23218 | + /* | |
23219 | + * multipath entry marked as faulty, unfaulty it | |
23220 | + */ | |
23221 | + kdev_t dev; | |
23222 | + | |
23223 | + dev = MKDEV(desc->major, desc->minor); | |
23224 | + | |
23225 | + rdev = evms_md_find_rdev(mddev, dev); | |
23226 | + if (rdev) | |
23227 | + mark_disk_spare(desc); | |
23228 | + else { | |
23229 | + LOG_WARNING("%s: [md%d] (MULTIPATH) found former faulty device [number=%d]\n", | |
23230 | + __FUNCTION__ ,mdidx(mddev), desc->number); | |
23231 | + /* | |
23232 | + * Don't call remove_descriptor(), | |
23233 | + * let the administrator remove it from the user-land */ | |
23234 | + /* remove_descriptor(desc, sb); */ | |
23235 | + } | |
23236 | + } | |
23237 | + | |
23238 | + /* | |
23239 | + * Is this device present in the rdev ring? | |
23240 | + */ | |
23241 | + found = 0; | |
23242 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23243 | + /* | |
23244 | + * Multi-path IO special-case: since we have no | |
23245 | + * this_disk descriptor at auto-detect time, | |
23246 | + * we cannot check rdev->number. | |
23247 | + * We can check the device though. | |
23248 | + */ | |
23249 | + if ((sb->level == -4) && (rdev->dev == | |
23250 | + MKDEV(desc->major,desc->minor))) { | |
23251 | + found = 1; | |
23252 | + break; | |
23253 | + } | |
23254 | + if (rdev->desc_nr == desc->number) { | |
23255 | + found = 1; | |
23256 | + break; | |
23257 | + } | |
23258 | + } | |
23259 | + if (found) | |
23260 | + continue; | |
23261 | + | |
23262 | + LOG_WARNING(" [md%d]: former device [number=%d] is unavailable!\n", | |
23263 | + mdidx(mddev), desc->number); | |
23264 | + remove_descriptor(desc, sb); | |
23265 | + } | |
23266 | + | |
23267 | + /* | |
23268 | + * Kick all rdevs that are not in the | |
23269 | + * descriptor array: | |
23270 | + */ | |
23271 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23272 | + if (rdev->desc_nr == -1) | |
23273 | + kick_rdev_from_array(rdev); | |
23274 | + } | |
23275 | + | |
23276 | + /* | |
23277 | + * Do a final reality check. | |
23278 | + */ | |
23279 | + if (mddev->sb->level != -4) { | |
23280 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23281 | + if (rdev->desc_nr == -1) { | |
23282 | + MD_BUG(); | |
23283 | + goto abort; | |
23284 | + } | |
23285 | + /* | |
23286 | + * is the desc_nr unique? | |
23287 | + */ | |
23288 | + ITERATE_RDEV(mddev,rdev2,tmp2) { | |
23289 | + if ((rdev2 != rdev) && | |
23290 | + (rdev2->desc_nr == rdev->desc_nr)) { | |
23291 | + MD_BUG(); | |
23292 | + goto abort; | |
23293 | + } | |
23294 | + } | |
23295 | + } | |
23296 | + } | |
23297 | + | |
23298 | +#define OLD_VERSION KERN_ALERT \ | |
23299 | +"md%d: unsupported raid array version %d.%d.%d\n" | |
23300 | + | |
23301 | +#define NOT_CLEAN_IGNORE KERN_ERR \ | |
23302 | +"md%d: raid array is not clean -- starting background reconstruction\n" | |
23303 | + | |
23304 | + /* | |
23305 | + * Check if we can support this RAID array | |
23306 | + */ | |
23307 | + if (sb->major_version != MD_MAJOR_VERSION || | |
23308 | + sb->minor_version > MD_MINOR_VERSION) { | |
23309 | + | |
23310 | + LOG_ERROR("[md%d] unsupported raid array version %d.%d.%d\n", | |
23311 | + mdidx(mddev), | |
23312 | + sb->major_version, | |
23313 | + sb->minor_version, | |
23314 | + sb->patch_version); | |
23315 | + goto abort; | |
23316 | + } | |
23317 | + | |
23318 | + if ((sb->state != (1 << MD_SB_CLEAN)) && ((sb->level == 1) || | |
23319 | + (sb->level == 4) || (sb->level == 5))) | |
23320 | + LOG_WARNING("[md%d, level=%d] raid array is not clean -- starting background reconstruction\n", | |
23321 | + mdidx(mddev), sb->level); | |
23322 | + | |
23323 | + LOG_ENTRY_EXIT("analysis of all superblocks is OK!\n"); | |
23324 | + return 0; | |
23325 | +abort: | |
23326 | + LOG_WARNING("ABORT analyze_sbs()!!!\n"); | |
23327 | + return 1; | |
23328 | +} | |
23329 | + | |
23330 | + | |
23331 | +static int device_size_calculation (mddev_t * mddev) | |
23332 | +{ | |
23333 | + int data_disks = 0, persistent; | |
23334 | + //unsigned int readahead; | |
23335 | + mdp_super_t *sb = mddev->sb; | |
23336 | + struct list_head *tmp; | |
23337 | + mdk_rdev_t *rdev; | |
23338 | + | |
23339 | + /* | |
23340 | + * Do device size calculation. Bail out if too small. | |
23341 | + * (we have to do this after having validated chunk_size, | |
23342 | + * because device size has to be modulo chunk_size) | |
23343 | + */ | |
23344 | + persistent = !mddev->sb->not_persistent; | |
23345 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23346 | + if (rdev->faulty) | |
23347 | + continue; | |
23348 | + if (rdev->size) { | |
23349 | + LOG_DEFAULT("%s: already calculated %s\n", __FUNCTION__, get_partition_name(rdev)); | |
23350 | + continue; | |
23351 | + } | |
23352 | + rdev->size = evms_md_calc_dev_size(rdev->node, mddev, persistent); | |
23353 | + if (rdev->size < sb->chunk_size / 1024) { | |
23354 | + LOG_WARNING("Dev %s smaller than chunk_size: %ldk < %dk\n", | |
23355 | + get_partition_name(rdev), rdev->size, sb->chunk_size / 1024); | |
23356 | + return -EINVAL; | |
23357 | + } | |
23358 | + } | |
23359 | + | |
23360 | + switch (sb->level) { | |
23361 | + case -4: | |
23362 | + data_disks = 1; | |
23363 | + break; | |
23364 | + case -3: | |
23365 | + data_disks = 1; | |
23366 | + break; | |
23367 | + case -2: | |
23368 | + data_disks = 1; | |
23369 | + break; | |
23370 | + case -1: | |
23371 | + zoned_raid_size(mddev); | |
23372 | + data_disks = 1; | |
23373 | + break; | |
23374 | + case 0: | |
23375 | + zoned_raid_size(mddev); | |
23376 | + data_disks = sb->raid_disks; | |
23377 | + break; | |
23378 | + case 1: | |
23379 | + data_disks = 1; | |
23380 | + break; | |
23381 | + case 4: | |
23382 | + case 5: | |
23383 | + data_disks = sb->raid_disks-1; | |
23384 | + break; | |
23385 | + default: | |
23386 | + LOG_ERROR("[md%d] unkown level %d\n", mdidx(mddev), sb->level); | |
23387 | + goto abort; | |
23388 | + } | |
23389 | + if (!evms_md_size[mdidx(mddev)]) | |
23390 | + evms_md_size[mdidx(mddev)] = sb->size * data_disks; | |
23391 | + | |
23392 | + return 0; | |
23393 | +abort: | |
23394 | + return 1; | |
23395 | +} | |
23396 | + | |
23397 | + | |
23398 | +#define TOO_BIG_CHUNKSIZE KERN_ERR \ | |
23399 | +"too big chunk_size: %d > %d\n" | |
23400 | + | |
23401 | +#define TOO_SMALL_CHUNKSIZE KERN_ERR \ | |
23402 | +"too small chunk_size: %d < %ld\n" | |
23403 | + | |
23404 | +#define BAD_CHUNKSIZE KERN_ERR \ | |
23405 | +"no chunksize specified, see 'man raidtab'\n" | |
23406 | + | |
23407 | +static int do_md_run (mddev_t * mddev) | |
23408 | +{ | |
23409 | + int pnum, err; | |
23410 | + int chunk_size; | |
23411 | + struct list_head *tmp; | |
23412 | + mdk_rdev_t *rdev; | |
23413 | + | |
23414 | + | |
23415 | + if (!mddev->nb_dev) { | |
23416 | + MD_BUG(); | |
23417 | + return -EINVAL; | |
23418 | + } | |
23419 | + | |
23420 | + if (mddev->pers) | |
23421 | + return -EBUSY; | |
23422 | + | |
23423 | + /* | |
23424 | + * Resize disks to align partitions size on a given | |
23425 | + * chunk size. | |
23426 | + */ | |
23427 | + evms_md_size[mdidx(mddev)] = 0; | |
23428 | + | |
23429 | + /* | |
23430 | + * Analyze all RAID superblock(s) | |
23431 | + */ | |
23432 | + if (evms_md_analyze_sbs(mddev)) { | |
23433 | + MD_BUG(); | |
23434 | + return -EINVAL; | |
23435 | + } | |
23436 | + | |
23437 | + mddev->chunk_size = chunk_size = mddev->sb->chunk_size; | |
23438 | + pnum = level_to_pers(mddev->sb->level); | |
23439 | + | |
23440 | + if ((pnum != MULTIPATH) && (pnum != RAID1)) { | |
23441 | + if (!chunk_size) { | |
23442 | + /* | |
23443 | + * 'default chunksize' in the old md code used to | |
23444 | + * be PAGE_SIZE, baaad. | |
23445 | + * we abort here to be on the safe side. We dont | |
23446 | + * want to continue the bad practice. | |
23447 | + */ | |
23448 | + printk(BAD_CHUNKSIZE); | |
23449 | + return -EINVAL; | |
23450 | + } | |
23451 | + if (chunk_size > MAX_CHUNK_SIZE) { | |
23452 | + printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE); | |
23453 | + return -EINVAL; | |
23454 | + } | |
23455 | + /* | |
23456 | + * chunk-size has to be a power of 2 and multiples of PAGE_SIZE | |
23457 | + */ | |
23458 | + if ( (1 << ffz(~chunk_size)) != chunk_size) { | |
23459 | + MD_BUG(); | |
23460 | + return -EINVAL; | |
23461 | + } | |
23462 | + if (chunk_size < PAGE_SIZE) { | |
23463 | + printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE); | |
23464 | + return -EINVAL; | |
23465 | + } | |
23466 | + } else | |
23467 | + if (chunk_size) | |
23468 | + printk(KERN_INFO "RAID level %d does not need chunksize! Continuing anyway.\n", mddev->sb->level); | |
23469 | + | |
23470 | + if (pnum >= MAX_PERSONALITY) { | |
23471 | + MD_BUG(); | |
23472 | + return -EINVAL; | |
23473 | + } | |
23474 | + if (!pers[pnum]) | |
23475 | + { | |
23476 | +#ifdef CONFIG_KMOD | |
23477 | + char module_name[80]; | |
23478 | + sprintf (module_name, "md-personality-%d", pnum); | |
23479 | + request_module (module_name); | |
23480 | + if (!pers[pnum]) | |
23481 | +#endif | |
23482 | + { | |
23483 | + printk(KERN_ERR "personality %d is not loaded!\n", | |
23484 | + pnum); | |
23485 | + return -EINVAL; | |
23486 | + } | |
23487 | + } | |
23488 | + if (device_size_calculation(mddev)) | |
23489 | + return -EINVAL; | |
23490 | + | |
23491 | + /* | |
23492 | + * Drop all container device buffers, from now on | |
23493 | + * the only valid external interface is through the md | |
23494 | + * device. | |
23495 | + * Also find largest hardsector size | |
23496 | + */ | |
23497 | + md_hardsect_sizes[mdidx(mddev)] = 512; | |
23498 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23499 | + if (rdev->faulty) | |
23500 | + continue; | |
23501 | + invalidate_device(rdev->dev, 1); | |
23502 | +/* if (get_hardsect_size(rdev->dev) | |
23503 | + > md_hardsect_sizes[mdidx(mddev)]) | |
23504 | + md_hardsect_sizes[mdidx(mddev)] = | |
23505 | + get_hardsect_size(rdev->dev); */ | |
23506 | + if (rdev->node->hardsector_size > md_hardsect_sizes[mdidx(mddev)]) { | |
23507 | + md_hardsect_sizes[mdidx(mddev)] = rdev->node->hardsector_size; | |
23508 | + } | |
23509 | + | |
23510 | + } | |
23511 | + md_blocksizes[mdidx(mddev)] = 1024; | |
23512 | + if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) | |
23513 | + md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; | |
23514 | + | |
23515 | + mddev->pers = pers[pnum]; | |
23516 | + | |
23517 | + err = mddev->pers->run(mddev); | |
23518 | + if (err) { | |
23519 | + LOG_WARNING("%s: pers->run() failed.\n", __FUNCTION__); | |
23520 | + mddev->pers = NULL; | |
23521 | + return -EINVAL; | |
23522 | + } | |
23523 | + mddev->sb->state &= ~(1 << MD_SB_CLEAN); | |
23524 | + | |
23525 | + evms_md_update_sb(mddev); | |
23526 | + | |
23527 | + if (incomplete_mddev(mddev)) { | |
23528 | + LOG_DEFAULT("%s: [md%d] was incomplete!\n", __FUNCTION__, mdidx(mddev)); | |
23529 | + list_del(&mddev->incomplete_mddevs); | |
23530 | + INIT_LIST_HEAD(&mddev->incomplete_mddevs); | |
23531 | + } | |
23532 | + | |
23533 | + list_add(&mddev->running_mddevs, &running_mddevs); | |
23534 | + | |
23535 | + return (0); | |
23536 | +} | |
23537 | + | |
23538 | +#undef TOO_BIG_CHUNKSIZE | |
23539 | +#undef BAD_CHUNKSIZE | |
23540 | + | |
23541 | + | |
23542 | +#define OUT(x) do { err = (x); goto out; } while (0) | |
23543 | + | |
23544 | + | |
23545 | +#define STILL_MOUNTED KERN_WARNING \ | |
23546 | +"md%d still mounted.\n" | |
23547 | +#define STILL_IN_USE \ | |
23548 | +"md%d still in use.\n" | |
23549 | + | |
23550 | +static int do_md_stop (mddev_t * mddev, int ro) | |
23551 | +{ | |
23552 | + int err = 0, resync_interrupted = 0, clean = 0; | |
23553 | + kdev_t dev = mddev_to_kdev(mddev); | |
23554 | + | |
23555 | + if (atomic_read(&mddev->active)>1) { | |
23556 | + printk(STILL_IN_USE, mdidx(mddev)); | |
23557 | + OUT(-EBUSY); | |
23558 | + } | |
23559 | + | |
23560 | + if (mddev->pers) { | |
23561 | + /* | |
23562 | + * It is safe to call stop here, it only frees private | |
23563 | + * data. Also, it tells us if a device is unstoppable | |
23564 | + * (eg. resyncing is in progress) | |
23565 | + */ | |
23566 | + if (mddev->pers->stop_resync) | |
23567 | + if (mddev->pers->stop_resync(mddev)) | |
23568 | + resync_interrupted = 1; | |
23569 | + | |
23570 | + if (mddev->recovery_running) | |
23571 | + evms_cs_interrupt_thread(evms_md_recovery_thread); | |
23572 | + | |
23573 | + /* | |
23574 | + * This synchronizes with signal delivery to the | |
23575 | + * resync or reconstruction thread. It also nicely | |
23576 | + * hangs the process if some reconstruction has not | |
23577 | + * finished. | |
23578 | + */ | |
23579 | + down(&mddev->recovery_sem); | |
23580 | + up(&mddev->recovery_sem); | |
23581 | + | |
23582 | + invalidate_device(dev, 1); | |
23583 | + | |
23584 | + if (ro) { | |
23585 | + if (mddev->ro) | |
23586 | + OUT(-ENXIO); | |
23587 | + mddev->ro = 1; | |
23588 | + mddev->node->plugin = &md_plugin_header; | |
23589 | + } else { | |
23590 | + if (mddev->ro) | |
23591 | + set_device_ro(dev, 0); | |
23592 | + if (mddev->pers->stop(mddev)) { | |
23593 | + if (mddev->ro) | |
23594 | + set_device_ro(dev, 1); | |
23595 | + OUT(-EBUSY); | |
23596 | + } | |
23597 | + if (mddev->ro) | |
23598 | + mddev->ro = 0; | |
23599 | + } | |
23600 | + if (mddev->sb) { | |
23601 | + /* | |
23602 | + * mark it clean only if there was no resync | |
23603 | + * interrupted. | |
23604 | + */ | |
23605 | + if (!mddev->recovery_running && !resync_interrupted) { | |
23606 | + LOG_DEBUG("%s: marking sb clean...\n", __FUNCTION__); | |
23607 | + clean = 1; | |
23608 | + } | |
23609 | + evms_md_update_sb_sync(mddev, clean); | |
23610 | + } | |
23611 | + if (ro) | |
23612 | + set_device_ro(dev, 1); | |
23613 | + } | |
23614 | + | |
23615 | + /* | |
23616 | + * Free resources if final stop | |
23617 | + */ | |
23618 | + if (!ro) { | |
23619 | + printk (KERN_INFO "md%d stopped.\n", mdidx(mddev)); | |
23620 | + free_mddev(mddev); | |
23621 | + | |
23622 | + } else | |
23623 | + printk (KERN_INFO | |
23624 | + "md%d switched to read-only mode.\n", mdidx(mddev)); | |
23625 | +out: | |
23626 | + return err; | |
23627 | +} | |
23628 | + | |
23629 | + | |
23630 | +static int evms_md_run_array (struct evms_logical_node ** discover_list, mddev_t *mddev) | |
23631 | +{ | |
23632 | + mdk_rdev_t *rdev; | |
23633 | + struct list_head *tmp; | |
23634 | + int err = 0; | |
23635 | + uint flags = 0; | |
23636 | + | |
23637 | + if (mddev->disks.prev == &mddev->disks) { | |
23638 | + MD_BUG(); | |
23639 | + return -EINVAL; | |
23640 | + } | |
23641 | + | |
23642 | + LOG_DETAILS("%s: trying to run array md%d\n", __FUNCTION__,mdidx(mddev) ); | |
23643 | + | |
23644 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23645 | + LOG_DETAILS(" <%s>\n", get_partition_name(rdev)); | |
23646 | + } | |
23647 | + | |
23648 | + err = do_md_run (mddev); | |
23649 | + if (!err) { | |
23650 | + /* | |
23651 | + * remove all nodes consumed by this md device from the discover list | |
23652 | + */ | |
23653 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23654 | + LOG_DETAILS(" removing %s from discover list.\n", get_partition_name(rdev)); | |
23655 | + evms_cs_remove_logical_node_from_list(discover_list,rdev->node); | |
23656 | + flags |= rdev->node->flags; | |
23657 | + } | |
23658 | + err = evms_md_create_logical_node(discover_list,mddev,flags); | |
23659 | + if (!err) { | |
23660 | + exported_nodes++; | |
23661 | + } | |
23662 | + } else { | |
23663 | + LOG_WARNING("%s: could not start [md%d] containing: \n",__FUNCTION__,mdidx(mddev)); | |
23664 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23665 | + LOG_WARNING(" (%s, desc_nr=%d)\n", get_partition_name(rdev), rdev->desc_nr); | |
23666 | + } | |
23667 | + LOG_WARNING("%s: will try restart [md%d] again later.\n",__FUNCTION__,mdidx(mddev)); | |
23668 | + | |
23669 | + mddev->sb_dirty = 0; | |
23670 | + } | |
23671 | + return err; | |
23672 | +} | |
23673 | + | |
23674 | +static void evms_md_run_incomplete_array (struct evms_logical_node ** discover_list, mddev_t *mddev) | |
23675 | +{ | |
23676 | + mdk_rdev_t *rdev; | |
23677 | + | |
23678 | + LOG_DEFAULT("%s [md%d]\n", | |
23679 | + __FUNCTION__, mdidx(mddev)); | |
23680 | + if (evms_md_run_array(discover_list,mddev) == 0) { | |
23681 | + /* | |
23682 | + * We succeeded running this MD device. | |
23683 | + * Now read MD superblock on this newly created MD node. | |
23684 | + */ | |
23685 | + if (mddev->node && | |
23686 | + (evms_md_import_device(discover_list,mddev->node) == 0)) { | |
23687 | + /* | |
23688 | + * Yes, there is a superblock on this MD node. | |
23689 | + * We probably have a MD stacking case here. | |
23690 | + */ | |
23691 | + rdev = evms_md_find_rdev_all(mddev->node); | |
23692 | + if (rdev) { | |
23693 | + list_add(&rdev->pending, &pending_raid_disks); | |
23694 | + evms_md_run_devices(discover_list); | |
23695 | + } else { | |
23696 | + LOG_WARNING("%s: imported %s but no rdev was found!\n", | |
23697 | + __FUNCTION__, | |
23698 | + evms_md_partition_name(mddev->node)); | |
23699 | + } | |
23700 | + } | |
23701 | + } | |
23702 | + if (incomplete_mddev(mddev)) { | |
23703 | + list_del(&mddev->incomplete_mddevs); | |
23704 | + INIT_LIST_HEAD(&mddev->incomplete_mddevs); | |
23705 | + } | |
23706 | +} | |
23707 | + | |
23708 | +/* | |
23709 | + * lets try to run arrays based on all disks that have arrived | |
23710 | + * until now. (those are in the ->pending list) | |
23711 | + * | |
23712 | + * the method: pick the first pending disk, collect all disks with | |
23713 | + * the same UUID, remove all from the pending list and put them into | |
23714 | + * the 'same_array' list. Then order this list based on superblock | |
23715 | + * update time (freshest comes first), kick out 'old' disks and | |
23716 | + * compare superblocks. If everything's fine then run it. | |
23717 | + * | |
23718 | + * If "unit" is allocated, then bump its reference count | |
23719 | + */ | |
23720 | +static void evms_md_run_devices (struct evms_logical_node **discover_list) | |
23721 | +{ | |
23722 | + struct list_head candidates; | |
23723 | + struct list_head *tmp; | |
23724 | + mdk_rdev_t *rdev0, *rdev; | |
23725 | + mddev_t *mddev; | |
23726 | + kdev_t md_kdev; | |
23727 | + | |
23728 | + | |
23729 | + LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__); | |
23730 | + while (pending_raid_disks.next != &pending_raid_disks) { | |
23731 | + rdev0 = list_entry(pending_raid_disks.next, | |
23732 | + mdk_rdev_t, pending); | |
23733 | + MD_INIT_LIST_HEAD(&candidates); | |
23734 | + ITERATE_RDEV_PENDING(rdev,tmp) { | |
23735 | + if (uuid_equal(rdev0, rdev)) { | |
23736 | + if (!sb_equal(rdev0->sb, rdev->sb)) { | |
23737 | + LOG_DETAILS("%s has same UUID as %s, but superblocks differ ...\n",\ | |
23738 | + get_partition_name(rdev),get_partition_name(rdev0)); | |
23739 | + continue; | |
23740 | + } | |
23741 | + list_del(&rdev->pending); | |
23742 | + list_add(&rdev->pending, &candidates); | |
23743 | + } | |
23744 | + } | |
23745 | + | |
23746 | + /* | |
23747 | + * now we have a set of devices, with all of them having | |
23748 | + * mostly sane superblocks. It's time to allocate the | |
23749 | + * mddev. | |
23750 | + */ | |
23751 | + md_kdev = MKDEV(MD_MAJOR, rdev0->sb->md_minor); | |
23752 | + mddev = kdev_to_mddev(md_kdev); | |
23753 | + if (mddev && (!incomplete_mddev(mddev))) { | |
23754 | + LOG_DETAILS("md%d already running, cannot run %s\n", | |
23755 | + mdidx(mddev), get_partition_name(rdev0)); | |
23756 | + | |
23757 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23758 | + /* | |
23759 | + * This is EVMS re-discovery! | |
23760 | + * Remove all nodes consumed by this md device from the discover list | |
23761 | + */ | |
23762 | + evms_cs_remove_logical_node_from_list(discover_list,rdev->node); | |
23763 | + } | |
23764 | + | |
23765 | + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { | |
23766 | + if (evms_md_find_mddev_all(rdev->node)) | |
23767 | + /* | |
23768 | + * We have found an MD superblock on top of a running MD array. | |
23769 | + * Delete rdev but keep the MD array. | |
23770 | + */ | |
23771 | + evms_md_export_rdev(rdev, FALSE); | |
23772 | + else | |
23773 | + evms_md_export_rdev(rdev, TRUE); | |
23774 | + } | |
23775 | + continue; | |
23776 | + } | |
23777 | + | |
23778 | + if (!mddev) { | |
23779 | + mddev = alloc_mddev(md_kdev); | |
23780 | + if (mddev == NULL) { | |
23781 | + LOG_ERROR("cannot allocate memory for md drive.\n"); | |
23782 | + break; | |
23783 | + } | |
23784 | + LOG_DETAILS("created md%d\n", mdidx(mddev)); | |
23785 | + } else { | |
23786 | + LOG_DETAILS("%s: found INCOMPLETE md%d\n", __FUNCTION__, mdidx(mddev)); | |
23787 | + } | |
23788 | + | |
23789 | + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { | |
23790 | + bind_rdev_to_array(rdev, mddev); | |
23791 | + list_del(&rdev->pending); | |
23792 | + MD_INIT_LIST_HEAD(&rdev->pending); | |
23793 | + } | |
23794 | + | |
23795 | + if ((mddev->nr_raid_disks >= rdev0->sb->raid_disks) || | |
23796 | + (mddev->nb_dev == rdev0->sb->nr_disks)) { | |
23797 | + evms_md_run_array(discover_list,mddev); | |
23798 | + } else { | |
23799 | + LOG_DETAILS("THIS md%d IS INCOMPLETE, found %d devices, need %d\n", | |
23800 | + mdidx(mddev), mddev->nr_raid_disks, rdev0->sb->raid_disks); | |
23801 | + list_add(&mddev->incomplete_mddevs, &incomplete_mddevs); | |
23802 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23803 | + evms_cs_remove_logical_node_from_list(discover_list,rdev->node); | |
23804 | + } | |
23805 | + } | |
23806 | + } | |
23807 | + LOG_ENTRY_EXIT("%s: EXIT\n", __FUNCTION__); | |
23808 | +} | |
23809 | + | |
23810 | +void evms_md_recover_arrays(void) | |
23811 | +{ | |
23812 | + if (!evms_md_recovery_thread) { | |
23813 | + MD_BUG(); | |
23814 | + return; | |
23815 | + } | |
23816 | + evms_cs_wakeup_thread(evms_md_recovery_thread); | |
23817 | +} | |
23818 | + | |
23819 | +int evms_md_error_dev( | |
23820 | + mddev_t *mddev, | |
23821 | + kdev_t dev) | |
23822 | +{ | |
23823 | + mdk_rdev_t * rdev; | |
23824 | + | |
23825 | + rdev = evms_md_find_rdev(mddev, dev); | |
23826 | + if (rdev) { | |
23827 | + return evms_md_error(mddev,rdev->node); | |
23828 | + } else { | |
23829 | + LOG_ERROR("%s: could not find %s in md%d\n", | |
23830 | + __FUNCTION__, org_partition_name(dev), mdidx(mddev)); | |
23831 | + return 0; | |
23832 | + } | |
23833 | +} | |
23834 | + | |
23835 | +int evms_md_error( | |
23836 | + mddev_t *mddev, | |
23837 | + struct evms_logical_node *node) | |
23838 | +{ | |
23839 | + mdk_rdev_t * rrdev; | |
23840 | + | |
23841 | + /* check for NULL first */ | |
23842 | + if (!mddev) { | |
23843 | + MD_BUG(); | |
23844 | + return 0; | |
23845 | + } | |
23846 | + LOG_ERROR("evms_md_error dev:(md%d), node:(%s), (caller: %p,%p,%p,%p).\n", | |
23847 | + mdidx(mddev), node->name, | |
23848 | + __builtin_return_address(0),__builtin_return_address(1), | |
23849 | + __builtin_return_address(2),__builtin_return_address(3)); | |
23850 | + | |
23851 | + rrdev = evms_md_find_rdev_from_node(mddev, node); | |
23852 | + if (!rrdev || rrdev->faulty) | |
23853 | + return 0; | |
23854 | + if (!mddev->pers->error_handler | |
23855 | + || mddev->pers->error_handler(mddev,node) <= 0) { | |
23856 | + free_disk_sb(rrdev); | |
23857 | + rrdev->faulty = 1; | |
23858 | + } else | |
23859 | + return 1; | |
23860 | + /* | |
23861 | + * if recovery was running, stop it now. | |
23862 | + */ | |
23863 | + if (mddev->pers->stop_resync) | |
23864 | + mddev->pers->stop_resync(mddev); | |
23865 | + if (mddev->recovery_running) | |
23866 | + evms_cs_interrupt_thread(evms_md_recovery_thread); | |
23867 | + evms_md_recover_arrays(); | |
23868 | + | |
23869 | + return 0; | |
23870 | +} | |
23871 | + | |
23872 | +int evms_register_md_personality (int pnum, mdk_personality_t *p) | |
23873 | +{ | |
23874 | + if (pnum >= MAX_PERSONALITY) { | |
23875 | + MD_BUG(); | |
23876 | + return -EINVAL; | |
23877 | + } | |
23878 | + | |
23879 | + if (pers[pnum]) { | |
23880 | + MD_BUG(); | |
23881 | + return -EBUSY; | |
23882 | + } | |
23883 | + | |
23884 | + pers[pnum] = p; | |
23885 | + LOG_DETAILS("%s personality registered as nr %d\n",p->name, pnum); | |
23886 | + return 0; | |
23887 | +} | |
23888 | + | |
23889 | +int evms_unregister_md_personality (int pnum) | |
23890 | +{ | |
23891 | + if (pnum >= MAX_PERSONALITY) { | |
23892 | + MD_BUG(); | |
23893 | + return -EINVAL; | |
23894 | + } | |
23895 | + | |
23896 | + printk(KERN_INFO "%s personality unregistered\n", pers[pnum]->name); | |
23897 | + pers[pnum] = NULL; | |
23898 | + return 0; | |
23899 | +} | |
23900 | + | |
23901 | +mdp_disk_t *evms_md_get_spare(mddev_t *mddev) | |
23902 | +{ | |
23903 | + mdp_super_t *sb = mddev->sb; | |
23904 | + mdp_disk_t *disk; | |
23905 | + mdk_rdev_t *rdev; | |
23906 | + int i, j; | |
23907 | + | |
23908 | + for (i = 0, j = 0; j < mddev->nb_dev; i++) { | |
23909 | + rdev = evms_md_find_rdev_nr(mddev, i); | |
23910 | + if (rdev == NULL) | |
23911 | + continue; | |
23912 | + j++; | |
23913 | + if (rdev->faulty) | |
23914 | + continue; | |
23915 | + if (!rdev->sb) { | |
23916 | + if (!rdev->virtual_spare) | |
23917 | + MD_BUG(); | |
23918 | + continue; | |
23919 | + } | |
23920 | + disk = &sb->disks[rdev->desc_nr]; | |
23921 | + if (disk_faulty(disk)) { | |
23922 | + MD_BUG(); | |
23923 | + continue; | |
23924 | + } | |
23925 | + if (disk_active(disk)) | |
23926 | + continue; | |
23927 | + return disk; | |
23928 | + } | |
23929 | + return NULL; | |
23930 | +} | |
23931 | + | |
23932 | +static mdp_disk_t *evms_md_find_disk(mddev_t *mddev, kdev_t dev) | |
23933 | +{ | |
23934 | + mdp_super_t *sb = mddev->sb; | |
23935 | + mdp_disk_t *disk; | |
23936 | + int i; | |
23937 | + | |
23938 | + for (i=0; i < MD_SB_DISKS; i++) { | |
23939 | + disk = &sb->disks[i]; | |
23940 | + if ((disk->major == MAJOR(dev)) && (disk->minor == MINOR(dev))) | |
23941 | + return disk; | |
23942 | + } | |
23943 | + return NULL; | |
23944 | +} | |
23945 | + | |
23946 | +static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK]; | |
23947 | +void evms_md_sync_acct( | |
23948 | + kdev_t dev, | |
23949 | + unsigned long nr_sectors) | |
23950 | +{ | |
23951 | + unsigned int major = MAJOR(dev); | |
23952 | + unsigned int index; | |
23953 | + | |
23954 | + index = disk_index(dev); | |
23955 | + if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) | |
23956 | + return; | |
23957 | + | |
23958 | + sync_io[major][index] += nr_sectors; | |
23959 | +} | |
23960 | + | |
23961 | +static int is_mddev_idle(mddev_t *mddev) | |
23962 | +{ | |
23963 | + mdk_rdev_t * rdev; | |
23964 | + struct list_head *tmp; | |
23965 | + int idle; | |
23966 | + unsigned long curr_events; | |
23967 | + | |
23968 | + idle = 1; | |
23969 | + ITERATE_RDEV(mddev,rdev,tmp) { | |
23970 | + int major = MAJOR(rdev->dev); | |
23971 | + int idx = disk_index(rdev->dev); | |
23972 | + | |
23973 | + if ((idx >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR)) | |
23974 | + continue; | |
23975 | + | |
23976 | + curr_events = kstat.dk_drive_rblk[major][idx] + | |
23977 | + kstat.dk_drive_wblk[major][idx] ; | |
23978 | + curr_events -= sync_io[major][idx]; | |
23979 | + if ((curr_events - rdev->last_events) > 32) { | |
23980 | + rdev->last_events = curr_events; | |
23981 | + idle = 0; | |
23982 | + } | |
23983 | + } | |
23984 | + return idle; | |
23985 | +} | |
23986 | + | |
23987 | +MD_DECLARE_WAIT_QUEUE_HEAD(evms_resync_wait); | |
23988 | + | |
23989 | +void evms_md_done_sync(mddev_t *mddev, int blocks, int ok) | |
23990 | +{ | |
23991 | + /* another "blocks" (512byte) blocks have been synced */ | |
23992 | + atomic_sub(blocks, &mddev->recovery_active); | |
23993 | + wake_up(&mddev->recovery_wait); | |
23994 | + if (!ok) { | |
23995 | + // stop recovery, signal do_sync .... | |
23996 | + } | |
23997 | +} | |
23998 | + | |
23999 | +#define SYNC_MARKS 10 | |
24000 | +#define SYNC_MARK_STEP (3*HZ) | |
24001 | +int evms_md_do_sync(mddev_t *mddev, mdp_disk_t *spare) | |
24002 | +{ | |
24003 | + mddev_t *mddev2; | |
24004 | + unsigned int max_sectors, currspeed, | |
24005 | + j, window, err, serialize; | |
24006 | + unsigned long mark[SYNC_MARKS]; | |
24007 | + unsigned long mark_cnt[SYNC_MARKS]; | |
24008 | + int last_mark,m; | |
24009 | + struct list_head *tmp; | |
24010 | + unsigned long last_check; | |
24011 | + | |
24012 | + | |
24013 | + err = down_interruptible(&mddev->resync_sem); | |
24014 | + if (err) | |
24015 | + goto out_nolock; | |
24016 | + | |
24017 | +recheck: | |
24018 | + serialize = 0; | |
24019 | + ITERATE_MDDEV(mddev2,tmp) { | |
24020 | + if (mddev2 == mddev) | |
24021 | + continue; | |
24022 | + if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) { | |
24023 | + LOG_DEFAULT("delaying resync of md%d until md%d " | |
24024 | + "has finished resync (they share one or more physical units)\n", | |
24025 | + mdidx(mddev), mdidx(mddev2)); | |
24026 | + serialize = 1; | |
24027 | + break; | |
24028 | + } | |
24029 | + } | |
24030 | + if (serialize) { | |
24031 | + interruptible_sleep_on(&evms_resync_wait); | |
24032 | + if (md_signal_pending(current)) { | |
24033 | + md_flush_signals(); | |
24034 | + err = -EINTR; | |
24035 | + goto out; | |
24036 | + } | |
24037 | + goto recheck; | |
24038 | + } | |
24039 | + | |
24040 | + mddev->curr_resync = 1; | |
24041 | + | |
24042 | + max_sectors = mddev->sb->size<<1; | |
24043 | + | |
24044 | + LOG_DEFAULT("syncing RAID array md%d\n", mdidx(mddev)); | |
24045 | + LOG_DEFAULT("minimum _guaranteed_ reconstruction speed: %d KB/sec/disc.\n", | |
24046 | + sysctl_speed_limit_min); | |
24047 | + LOG_DEFAULT("using maximum available idle IO bandwith " | |
24048 | + "(but not more than %d KB/sec) for reconstruction.\n", | |
24049 | + sysctl_speed_limit_max); | |
24050 | + | |
24051 | + /* | |
24052 | + * Resync has low priority. | |
24053 | + */ | |
24054 | +#ifdef O1_SCHEDULER | |
24055 | + set_user_nice(current,19); | |
24056 | +#else | |
24057 | + current->nice = 19; | |
24058 | +#endif | |
24059 | + | |
24060 | + is_mddev_idle(mddev); /* this also initializes IO event counters */ | |
24061 | + for (m = 0; m < SYNC_MARKS; m++) { | |
24062 | + mark[m] = jiffies; | |
24063 | + mark_cnt[m] = 0; | |
24064 | + } | |
24065 | + last_mark = 0; | |
24066 | + mddev->resync_mark = mark[last_mark]; | |
24067 | + mddev->resync_mark_cnt = mark_cnt[last_mark]; | |
24068 | + | |
24069 | + /* | |
24070 | + * Tune reconstruction: | |
24071 | + */ | |
24072 | + window = MD_READAHEAD*(PAGE_SIZE/512); | |
24073 | + LOG_DEFAULT("using %dk window, over a total of %d blocks.\n", | |
24074 | + window/2,max_sectors/2); | |
24075 | + | |
24076 | + atomic_set(&mddev->recovery_active, 0); | |
24077 | + init_waitqueue_head(&mddev->recovery_wait); | |
24078 | + last_check = 0; | |
24079 | + for (j = 0; j < max_sectors;) { | |
24080 | + int sectors; | |
24081 | + | |
24082 | + sectors = mddev->pers->sync_request(mddev, j); | |
24083 | + | |
24084 | + if (sectors < 0) { | |
24085 | + err = sectors; | |
24086 | + goto out; | |
24087 | + } | |
24088 | + atomic_add(sectors, &mddev->recovery_active); | |
24089 | + j += sectors; | |
24090 | + mddev->curr_resync = j; | |
24091 | + | |
24092 | + if (last_check + window > j) | |
24093 | + continue; | |
24094 | + | |
24095 | + last_check = j; | |
24096 | + | |
24097 | + run_task_queue(&tq_disk); | |
24098 | + | |
24099 | + repeat: | |
24100 | + if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) { | |
24101 | + /* step marks */ | |
24102 | + int next = (last_mark+1) % SYNC_MARKS; | |
24103 | + | |
24104 | + mddev->resync_mark = mark[next]; | |
24105 | + mddev->resync_mark_cnt = mark_cnt[next]; | |
24106 | + mark[next] = jiffies; | |
24107 | + mark_cnt[next] = j - atomic_read(&mddev->recovery_active); | |
24108 | + last_mark = next; | |
24109 | + } | |
24110 | + | |
24111 | + | |
24112 | + if (md_signal_pending(current)) { | |
24113 | + /* | |
24114 | + * got a signal, exit. | |
24115 | + */ | |
24116 | + mddev->curr_resync = 0; | |
24117 | + LOG_DEFAULT("evms_md_do_sync() got signal ... exiting\n"); | |
24118 | + md_flush_signals(); | |
24119 | + err = -EINTR; | |
24120 | + goto out; | |
24121 | + } | |
24122 | + | |
24123 | + /* | |
24124 | + * this loop exits only if either when we are slower than | |
24125 | + * the 'hard' speed limit, or the system was IO-idle for | |
24126 | + * a jiffy. | |
24127 | + * the system might be non-idle CPU-wise, but we only care | |
24128 | + * about not overloading the IO subsystem. (things like an | |
24129 | + * e2fsck being done on the RAID array should execute fast) | |
24130 | + */ | |
24131 | + if (md_need_resched(current)) | |
24132 | + schedule(); | |
24133 | + | |
24134 | + currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1; | |
24135 | + | |
24136 | + if (currspeed > sysctl_speed_limit_min) { | |
24137 | +#ifdef O1_SCHEDULER | |
24138 | + set_user_nice(current,19); | |
24139 | +#else | |
24140 | + current->nice = 19; | |
24141 | +#endif | |
24142 | + | |
24143 | + if ((currspeed > sysctl_speed_limit_max) || | |
24144 | + !is_mddev_idle(mddev)) { | |
24145 | +#ifdef O1_SCHEDULER | |
24146 | + set_current_state(TASK_INTERRUPTIBLE); | |
24147 | +#else | |
24148 | + current->state = TASK_INTERRUPTIBLE; | |
24149 | +#endif | |
24150 | + md_schedule_timeout(HZ/4); | |
24151 | + goto repeat; | |
24152 | + } | |
24153 | + } else | |
24154 | +#ifdef O1_SCHEDULER | |
24155 | + set_user_nice(current,-20); | |
24156 | +#else | |
24157 | + current->nice = -20; | |
24158 | +#endif | |
24159 | + } | |
24160 | + LOG_DEFAULT("md%d: sync done.\n",mdidx(mddev)); | |
24161 | + err = 0; | |
24162 | + /* | |
24163 | + * this also signals 'finished resyncing' to md_stop | |
24164 | + */ | |
24165 | +out: | |
24166 | + wait_event(mddev->recovery_wait, atomic_read(&mddev->recovery_active)==0); | |
24167 | + up(&mddev->resync_sem); | |
24168 | +out_nolock: | |
24169 | + mddev->curr_resync = 0; | |
24170 | + wake_up(&evms_resync_wait); | |
24171 | + return err; | |
24172 | +} | |
24173 | + | |
24174 | + | |
24175 | + | |
24176 | +/* | |
24177 | + * This is a kernel thread which syncs a spare disk with the active array | |
24178 | + * | |
24179 | + * the amount of foolproofing might seem to be a tad excessive, but an | |
24180 | + * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs | |
24181 | + * of my root partition with the first 0.5 gigs of my /home partition ... so | |
24182 | + * i'm a bit nervous ;) | |
24183 | + */ | |
24184 | +void evms_md_do_recovery(void *data) | |
24185 | +{ | |
24186 | + int err; | |
24187 | + mddev_t *mddev; | |
24188 | + mdp_super_t *sb; | |
24189 | + mdp_disk_t *spare; | |
24190 | + struct list_head *tmp; | |
24191 | + | |
24192 | + LOG_DEFAULT("recovery thread got woken up ...\n"); | |
24193 | +restart: | |
24194 | + ITERATE_MDDEV(mddev,tmp) { | |
24195 | + | |
24196 | + sb = mddev->sb; | |
24197 | + if (!sb) | |
24198 | + continue; | |
24199 | + if (mddev->recovery_running) | |
24200 | + continue; | |
24201 | + if (sb->active_disks == sb->raid_disks) | |
24202 | + continue; | |
24203 | + if (!sb->spare_disks) { | |
24204 | + LOG_ERROR(" [md%d] no spare disk to reconstruct array! " | |
24205 | + "-- continuing in degraded mode\n", mdidx(mddev)); | |
24206 | + continue; | |
24207 | + } | |
24208 | + | |
24209 | + spare = NULL; | |
24210 | + | |
24211 | + if (!spare) { | |
24212 | + /* | |
24213 | + * now here we get the spare and resync it. | |
24214 | + */ | |
24215 | + spare = evms_md_get_spare(mddev); | |
24216 | + } | |
24217 | + if (!spare) | |
24218 | + continue; | |
24219 | + | |
24220 | + LOG_DEFAULT(" [md%d] resyncing spare disk %s to replace failed disk\n", | |
24221 | + mdidx(mddev), org_partition_name(MKDEV(spare->major,spare->minor))); | |
24222 | + if (!mddev->pers->diskop) | |
24223 | + continue; | |
24224 | + | |
24225 | + if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) | |
24226 | + continue; | |
24227 | + | |
24228 | + down(&mddev->recovery_sem); | |
24229 | + mddev->recovery_running = 1; | |
24230 | + err = evms_md_do_sync(mddev, spare); | |
24231 | + if (err == -EIO) { | |
24232 | + LOG_DEFAULT("[md%d] spare disk %s failed, skipping to next spare.\n", | |
24233 | + mdidx(mddev), org_partition_name(MKDEV(spare->major,spare->minor))); | |
24234 | + if (!disk_faulty(spare)) { | |
24235 | + mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE); | |
24236 | + mark_disk_faulty(spare); | |
24237 | + mark_disk_nonsync(spare); | |
24238 | + mark_disk_inactive(spare); | |
24239 | + sb->spare_disks--; | |
24240 | + sb->working_disks--; | |
24241 | + sb->failed_disks++; | |
24242 | + } | |
24243 | + } else | |
24244 | + if (disk_faulty(spare)) | |
24245 | + mddev->pers->diskop(mddev, &spare, | |
24246 | + DISKOP_SPARE_INACTIVE); | |
24247 | + if (err == -EINTR || err == -ENOMEM) { | |
24248 | + /* | |
24249 | + * Recovery got interrupted, or ran out of mem ... | |
24250 | + * signal back that we have finished using the array. | |
24251 | + */ | |
24252 | + mddev->pers->diskop(mddev, &spare, | |
24253 | + DISKOP_SPARE_INACTIVE); | |
24254 | + up(&mddev->recovery_sem); | |
24255 | + mddev->recovery_running = 0; | |
24256 | + continue; | |
24257 | + } else { | |
24258 | + mddev->recovery_running = 0; | |
24259 | + up(&mddev->recovery_sem); | |
24260 | + } | |
24261 | + if (!disk_faulty(spare)) { | |
24262 | + /* | |
24263 | + * the SPARE_ACTIVE diskop possibly changes the | |
24264 | + * pointer too | |
24265 | + */ | |
24266 | + mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); | |
24267 | + mark_disk_sync(spare); | |
24268 | + mark_disk_active(spare); | |
24269 | + sb->active_disks++; | |
24270 | + sb->spare_disks--; | |
24271 | + } | |
24272 | + mddev->sb_dirty = 1; | |
24273 | + evms_md_update_sb(mddev); | |
24274 | + goto restart; | |
24275 | + } | |
24276 | + LOG_DEFAULT("recovery thread finished ...\n"); | |
24277 | + | |
24278 | +} | |
24279 | + | |
24280 | +static void evms_md_create_recovery_thread(void) | |
24281 | +{ | |
24282 | + static char * name = "evms_mdrecoveryd"; | |
24283 | + | |
24284 | + if (!evms_md_recovery_thread) { | |
24285 | + /* Create MD recovery thread */ | |
24286 | + evms_md_recovery_thread = evms_cs_register_thread(evms_md_do_recovery, NULL, name); | |
24287 | + if (!evms_md_recovery_thread) | |
24288 | + LOG_SERIOUS("%s: evms_cs_recovery_thread failed\n", __FUNCTION__); | |
24289 | + } | |
24290 | +} | |
24291 | + | |
24292 | +static void evms_md_destroy_recovery_thread(void) | |
24293 | +{ | |
24294 | + if (evms_md_recovery_thread && !MOD_IN_USE) { | |
24295 | + /* Destroy MD recovery thread */ | |
24296 | + evms_cs_unregister_thread(evms_md_recovery_thread); | |
24297 | + evms_md_recovery_thread = NULL; | |
24298 | + } | |
24299 | +} | |
24300 | + | |
24301 | +/** | |
24302 | + * evms_md_create_logical_node | |
24303 | + **/ | |
24304 | +static int evms_md_create_logical_node( | |
24305 | + struct evms_logical_node **discover_list, | |
24306 | + mddev_t *mddev, | |
24307 | + uint flags) | |
24308 | +{ | |
24309 | + int rc; | |
24310 | + struct evms_md *evms_md = NULL; | |
24311 | + struct evms_logical_node *newnode = NULL; | |
24312 | + struct evms_plugin_header *hdr = NULL; | |
24313 | + struct evms_plugin_fops *fops = NULL; | |
24314 | + | |
24315 | + rc = evms_cs_allocate_logical_node(&newnode); | |
24316 | + if (!rc) { | |
24317 | + evms_md = kmalloc(sizeof(*evms_md), GFP_KERNEL); | |
24318 | + if (!evms_md) { | |
24319 | + rc = -ENOMEM; | |
24320 | + } else { | |
24321 | + | |
24322 | + memset(evms_md,0,sizeof(*evms_md)); | |
24323 | + evms_md->mddev = mddev; | |
24324 | + | |
24325 | + fops = kmalloc(sizeof(*fops), GFP_KERNEL); | |
24326 | + if (fops) { | |
24327 | + /* copy MD plugin header | |
24328 | + * copy function table | |
24329 | + * replace read and write function pointers. | |
24330 | + */ | |
24331 | + evms_md->instance_plugin_hdr = md_plugin_header; | |
24332 | + memcpy(fops, &md_fops, sizeof(*fops)); | |
24333 | + fops->read = mddev->pers->read; | |
24334 | + fops->write = mddev->pers->write; | |
24335 | + evms_md->instance_plugin_hdr.fops = fops; | |
24336 | + hdr = &evms_md->instance_plugin_hdr; | |
24337 | + } else { | |
24338 | + LOG_WARNING("%s: No memory to copy function table\n",__FUNCTION__); | |
24339 | + rc = 0; /* clear rc and continue */ | |
24340 | + hdr = &md_plugin_header; | |
24341 | + } | |
24342 | + } | |
24343 | + } | |
24344 | + | |
24345 | + if (!rc && hdr) { | |
24346 | + memset(newnode,0,sizeof(*newnode)); | |
24347 | + newnode->plugin = hdr; | |
24348 | + newnode->total_vsectors = (u64)evms_md_size[mdidx(mddev)] * 2; | |
24349 | + newnode->block_size = md_blocksizes[mdidx(mddev)]; | |
24350 | + newnode->hardsector_size = md_hardsect_sizes[mdidx(mddev)]; | |
24351 | + sprintf(newnode->name,"md/md%d",mdidx(mddev)); | |
24352 | + newnode->private = evms_md; | |
24353 | + newnode->flags = flags; | |
24354 | + | |
24355 | + rc = evms_cs_add_logical_node_to_list(discover_list, newnode); | |
24356 | + if (rc) { | |
24357 | + LOG_ERROR("%s: could not add md node %s\n", __FUNCTION__, newnode->name); | |
24358 | + } else { | |
24359 | + LOG_DEBUG("%s: added [%s] to discover list (total_vsectors="PFU64")\n", | |
24360 | + __FUNCTION__, newnode->name, newnode->total_vsectors); | |
24361 | + } | |
24362 | + } | |
24363 | + | |
24364 | + if (!rc) { | |
24365 | + mddev->node = newnode; | |
24366 | + } else { | |
24367 | + if (evms_md) { | |
24368 | + if (fops) | |
24369 | + kfree(fops); | |
24370 | + kfree(evms_md); | |
24371 | + } | |
24372 | + if (newnode) | |
24373 | + evms_cs_deallocate_logical_node(newnode); | |
24374 | + } | |
24375 | + return rc; | |
24376 | +} | |
24377 | + | |
24378 | + | |
24379 | +/* | |
24380 | + * Function: evms_md_autostart_arrays | |
24381 | + * Discover MD "extended" devices | |
24382 | + * Add MD "extended" devices to pending list for further processing | |
24383 | + */ | |
24384 | +static void evms_md_autostart_arrays (struct evms_logical_node **discover_list) | |
24385 | +{ | |
24386 | + struct evms_logical_node *node, *next_node; | |
24387 | + mdk_rdev_t *rdev; | |
24388 | + int rc=0; | |
24389 | + | |
24390 | + LOG_ENTRY_EXIT(":autostart_arrays() ENTRY\n"); | |
24391 | + | |
24392 | + /* examine each node on the discover list */ | |
24393 | + next_node = *discover_list; | |
24394 | + while(next_node) { | |
24395 | + node = next_node; | |
24396 | + next_node = node->next; | |
24397 | + | |
24398 | + rc = evms_md_import_device(discover_list, node); | |
24399 | + if (rc && (rc != -EEXIST)) { | |
24400 | + LOG_EXTRA("autostart_arrrays() Not %s!\n",evms_md_partition_name(node)); | |
24401 | + continue; | |
24402 | + } | |
24403 | + | |
24404 | + /* | |
24405 | + * Sanity checks: | |
24406 | + */ | |
24407 | + rdev = evms_md_find_rdev_all(node); | |
24408 | + if (!rdev) { | |
24409 | + LOG_ERROR("find_rdev_all() failed\n"); | |
24410 | + continue; | |
24411 | + } | |
24412 | + if (rdev->faulty) { | |
24413 | + MD_BUG(); | |
24414 | + continue; | |
24415 | + } | |
24416 | + | |
24417 | + if (!rc) { | |
24418 | + list_add(&rdev->pending, &pending_raid_disks); | |
24419 | + } else if (rc == -EEXIST) { | |
24420 | + struct evms_logical_node *md_node; | |
24421 | + /* | |
24422 | + * Must be in a re-discovery process here. | |
24423 | + * Find the EVMS MD node that this rdev is a member of | |
24424 | + */ | |
24425 | + if (rdev->mddev) { | |
24426 | + md_node = rdev->mddev->node; | |
24427 | + if (md_node) { | |
24428 | + rc = evms_cs_add_logical_node_to_list(discover_list,md_node); | |
24429 | + switch (rc) { | |
24430 | + case 0: | |
24431 | + exported_nodes++; | |
24432 | + LOG_DETAILS("Added MD node (%s) to discover list\n", | |
24433 | + md_node->name); | |
24434 | + break; | |
24435 | + case 1: /* already on the list */ | |
24436 | + case 2: /* already on the list */ | |
24437 | + break; | |
24438 | + default: | |
24439 | + LOG_WARNING("could not add md node (%s), rc=%d\n", | |
24440 | + md_node->name, rc); | |
24441 | + } | |
24442 | + } else { | |
24443 | + LOG_ERROR("This MD device [md%d] does not have an EVMS logical node.\n", | |
24444 | + rdev->mddev->__minor); | |
24445 | + } | |
24446 | + } else { | |
24447 | + LOG_ERROR("This device [%s] does not belong to any array!\n", | |
24448 | + get_partition_name(rdev)); | |
24449 | + evms_md_export_rdev(rdev, TRUE); | |
24450 | + } | |
24451 | + evms_cs_remove_logical_node_from_list(discover_list,node); | |
24452 | + } | |
24453 | + } | |
24454 | + | |
24455 | + evms_md_run_devices(discover_list); | |
24456 | + LOG_DETAILS("EVMD MD:autostart_arrays() EXIT (exported_nodes=%d)\n",exported_nodes); | |
24457 | +} | |
24458 | + | |
24459 | +#ifdef CONFIG_PROC_FS | |
24460 | +static int status_resync(char * page, off_t * offset, int count, mddev_t * mddev) | |
24461 | +{ | |
24462 | + int sz = 0; | |
24463 | + off_t off = *offset; | |
24464 | + unsigned long max_blocks, resync, res, dt, db, rt; | |
24465 | + | |
24466 | + resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; | |
24467 | + max_blocks = mddev->sb->size; | |
24468 | + | |
24469 | + /* | |
24470 | + * Should not happen. | |
24471 | + */ | |
24472 | + if (!max_blocks) { | |
24473 | + MD_BUG(); | |
24474 | + return 0; | |
24475 | + } | |
24476 | + res = (resync/1024)*1000/(max_blocks/1024 + 1); | |
24477 | + { | |
24478 | + int i, x = res/50, y = 20-x; | |
24479 | + PROCPRINT("["); | |
24480 | + for (i = 0; i < x; i++) | |
24481 | + PROCPRINT("="); | |
24482 | + sz += sprintf(page + sz, ">"); | |
24483 | + for (i = 0; i < y; i++) | |
24484 | + PROCPRINT("."); | |
24485 | + PROCPRINT("] "); | |
24486 | + } | |
24487 | + if (!mddev->recovery_running) | |
24488 | + /* | |
24489 | + * true resync | |
24490 | + */ | |
24491 | + PROCPRINT(" resync =%3lu.%lu%% (%lu/%lu)", | |
24492 | + res/10, res % 10, resync, max_blocks); | |
24493 | + else | |
24494 | + /* | |
24495 | + * recovery ... | |
24496 | + */ | |
24497 | + PROCPRINT(" recovery =%3lu.%lu%% (%lu/%lu)", | |
24498 | + res/10, res % 10, resync, max_blocks); | |
24499 | + | |
24500 | + /* | |
24501 | + * We do not want to overflow, so the order of operands and | |
24502 | + * the * 100 / 100 trick are important. We do a +1 to be | |
24503 | + * safe against division by zero. We only estimate anyway. | |
24504 | + * | |
24505 | + * dt: time from mark until now | |
24506 | + * db: blocks written from mark until now | |
24507 | + * rt: remaining time | |
24508 | + */ | |
24509 | + dt = ((jiffies - mddev->resync_mark) / HZ); | |
24510 | + if (!dt) dt++; | |
24511 | + db = resync - (mddev->resync_mark_cnt/2); | |
24512 | + rt = (dt * ((max_blocks-resync) / (db/100+1)))/100; | |
24513 | + | |
24514 | + PROCPRINT(" finish=%lu.%lumin", rt / 60, (rt % 60)/6); | |
24515 | + | |
24516 | + PROCPRINT(" speed=%ldK/sec", db/dt); | |
24517 | + | |
24518 | +out: | |
24519 | + *offset = off; | |
24520 | + return sz; | |
24521 | +} | |
24522 | + | |
24523 | +static int evms_md_status_read_proc(char *page, char **start, off_t off, | |
24524 | + int count, int *eof, void *data) | |
24525 | +{ | |
24526 | + int sz = 0, j, size; | |
24527 | + struct list_head *tmp, *tmp2; | |
24528 | + mdk_rdev_t *rdev; | |
24529 | + mddev_t *mddev; | |
24530 | + | |
24531 | + PROCPRINT("Enterprise Volume Management System: MD Status\n"); | |
24532 | + PROCPRINT("Personalities : "); | |
24533 | + for (j = 0; j < MAX_PERSONALITY; j++) | |
24534 | + if (pers[j]) | |
24535 | + PROCPRINT("[%s] ", pers[j]->name); | |
24536 | + | |
24537 | + PROCPRINT("\n"); | |
24538 | + | |
24539 | + | |
24540 | + ITERATE_MDDEV(mddev,tmp) { | |
24541 | + PROCPRINT("md%d : %sactive", mdidx(mddev), | |
24542 | + mddev->pers ? "" : "in"); | |
24543 | + if (mddev->pers) { | |
24544 | + if (mddev->ro) | |
24545 | + PROCPRINT(" (read-only)"); | |
24546 | + PROCPRINT(" %s", mddev->pers->name); | |
24547 | + } | |
24548 | + | |
24549 | + size = 0; | |
24550 | + ITERATE_RDEV(mddev,rdev,tmp2) { | |
24551 | + PROCPRINT(" %s[%d]", | |
24552 | + rdev->node->name, rdev->desc_nr); | |
24553 | + if (rdev->faulty) { | |
24554 | + PROCPRINT("(F)"); | |
24555 | + continue; | |
24556 | + } | |
24557 | + size += rdev->size; | |
24558 | + } | |
24559 | + | |
24560 | + if (mddev->nb_dev) { | |
24561 | + if (mddev->pers) | |
24562 | + PROCPRINT("\n "PFU64" blocks", | |
24563 | + mddev->node->total_vsectors >> 1); | |
24564 | + else | |
24565 | + PROCPRINT("\n %d blocks", size); | |
24566 | + } | |
24567 | + | |
24568 | + if (!mddev->pers) { | |
24569 | + PROCPRINT("\n"); | |
24570 | + continue; | |
24571 | + } | |
24572 | + | |
24573 | + sz += mddev->pers->status (page+sz, mddev); | |
24574 | + | |
24575 | + PROCPRINT("\n "); | |
24576 | + if (mddev->curr_resync) { | |
24577 | + sz += status_resync (page+sz, &off, count, mddev); | |
24578 | + } else { | |
24579 | + if (atomic_read(&mddev->resync_sem.count) != 1) | |
24580 | + PROCPRINT(" resync=DELAYED"); | |
24581 | + } | |
24582 | + | |
24583 | + PROCPRINT("\n"); | |
24584 | + } | |
24585 | + *eof = 1; | |
24586 | +out: | |
24587 | + *start = page + off; | |
24588 | + sz -= off; | |
24589 | + if (sz < 0) | |
24590 | + sz = 0; | |
24591 | + return sz > count ? count : sz; | |
24592 | +} | |
24593 | +#endif | |
24594 | + | |
24595 | +/* Function: md_core_init | |
24596 | + */ | |
24597 | +int __init md_core_init(void) | |
24598 | +{ | |
24599 | +#ifdef CONFIG_PROC_FS | |
24600 | + struct proc_dir_entry *evms_proc_dir; | |
24601 | +#endif | |
24602 | + | |
24603 | +#ifdef CONFIG_PROC_FS | |
24604 | + evms_proc_dir = evms_cs_get_evms_proc_dir(); | |
24605 | + if (evms_proc_dir) { | |
24606 | + create_proc_read_entry("mdstat", 0, evms_proc_dir, evms_md_status_read_proc, NULL); | |
24607 | + } | |
24608 | + md_table_header = register_sysctl_table(dev_dir_table, 1); | |
24609 | +#endif | |
24610 | + | |
24611 | + return evms_cs_register_plugin(&md_plugin_header); | |
24612 | +} | |
24613 | + | |
24614 | +static void __exit md_core_exit(void) | |
24615 | +{ | |
24616 | +#ifdef CONFIG_PROC_FS | |
24617 | + struct proc_dir_entry *evms_proc_dir; | |
24618 | + | |
24619 | + evms_proc_dir = evms_cs_get_evms_proc_dir(); | |
24620 | + if (evms_proc_dir) { | |
24621 | + remove_proc_entry("mdstat", evms_proc_dir); | |
24622 | + } | |
24623 | + unregister_sysctl_table(md_table_header); | |
24624 | +#endif | |
24625 | + evms_cs_unregister_plugin(&md_plugin_header); | |
24626 | +} | |
24627 | + | |
24628 | +module_init(md_core_init); | |
24629 | +module_exit(md_core_exit); | |
24630 | +#ifdef MODULE_LICENSE | |
24631 | +MODULE_LICENSE("GPL"); | |
24632 | +#endif | |
24633 | + | |
24634 | +/* | |
24635 | + * In order to have the coexistence of this EVMS plugin and the orginal MD | |
24636 | + * module, the symbols exported by this plugin are prefixed with "evms_" | |
24637 | + */ | |
24638 | + | |
24639 | +MD_EXPORT_SYMBOL(evms_md_size); | |
24640 | +MD_EXPORT_SYMBOL(evms_register_md_personality); | |
24641 | +MD_EXPORT_SYMBOL(evms_unregister_md_personality); | |
24642 | + /* Export the following function for use with rdev->node in evms_md_k.h */ | |
24643 | +MD_EXPORT_SYMBOL(evms_md_partition_name); | |
24644 | + /* Export the following function for use with disks[] in md_p.h */ | |
24645 | +MD_EXPORT_SYMBOL(evms_md_error); | |
24646 | +MD_EXPORT_SYMBOL(evms_md_error_dev); | |
24647 | +MD_EXPORT_SYMBOL(evms_md_update_sb); | |
24648 | +MD_EXPORT_SYMBOL(evms_md_find_rdev_nr); | |
24649 | +MD_EXPORT_SYMBOL(evms_md_find_rdev); | |
24650 | +MD_EXPORT_SYMBOL(evms_md_find_rdev_from_node); | |
24651 | +MD_EXPORT_SYMBOL(evms_md_print_devices); | |
24652 | +MD_EXPORT_SYMBOL(evms_mddev_map); | |
24653 | +MD_EXPORT_SYMBOL(evms_md_check_ordering); | |
24654 | +MD_EXPORT_SYMBOL(evms_md_partial_sync_io); | |
24655 | +MD_EXPORT_SYMBOL(evms_md_sync_io); | |
24656 | +MD_EXPORT_SYMBOL(evms_md_do_sync); | |
24657 | +MD_EXPORT_SYMBOL(evms_md_sync_acct); | |
24658 | +MD_EXPORT_SYMBOL(evms_md_done_sync); | |
24659 | +MD_EXPORT_SYMBOL(evms_md_recover_arrays); | |
24660 | +MD_EXPORT_SYMBOL(evms_md_get_spare); | |
24661 | + | |
24662 | diff -Naur linux-2002-09-30/drivers/evms/md_linear.c evms-2002-09-30/drivers/evms/md_linear.c | |
24663 | --- linux-2002-09-30/drivers/evms/md_linear.c Wed Dec 31 18:00:00 1969 | |
24664 | +++ evms-2002-09-30/drivers/evms/md_linear.c Thu Aug 15 13:50:12 2002 | |
24665 | @@ -0,0 +1,285 @@ | |
24666 | +/* | |
24667 | + linear.c : Multiple Devices driver for Linux | |
24668 | + Copyright (C) 1994-96 Marc ZYNGIER | |
24669 | + <zyngier@ufr-info-p7.ibp.fr> or | |
24670 | + <maz@gloups.fdn.fr> | |
24671 | + | |
24672 | + Linear mode management functions. | |
24673 | + | |
24674 | + This program is free software; you can redistribute it and/or modify | |
24675 | + it under the terms of the GNU General Public License as published by | |
24676 | + the Free Software Foundation; either version 2, or (at your option) | |
24677 | + any later version. | |
24678 | + | |
24679 | + You should have received a copy of the GNU General Public License | |
24680 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
24681 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24682 | +*/ | |
24683 | + | |
24684 | +#include <linux/module.h> | |
24685 | +#include <linux/evms/evms_md.h> | |
24686 | +#include <linux/evms/evms_linear.h> | |
24687 | +#include <linux/slab.h> | |
24688 | + | |
24689 | + | |
24690 | +#define MAJOR_NR MD_MAJOR | |
24691 | +#define MD_DRIVER | |
24692 | +#define MD_PERSONALITY | |
24693 | + | |
24694 | +#define LOG_PREFIX "md linear: " | |
24695 | +static int linear_run (mddev_t *mddev) | |
24696 | +{ | |
24697 | + linear_conf_t *conf; | |
24698 | + struct linear_hash *table; | |
24699 | + mdk_rdev_t *rdev; | |
24700 | + int size, i, j, nb_zone; | |
24701 | + unsigned int curr_offset; | |
24702 | + | |
24703 | + MOD_INC_USE_COUNT; | |
24704 | + | |
24705 | + conf = kmalloc (sizeof (*conf), GFP_KERNEL); | |
24706 | + if (!conf) | |
24707 | + goto out; | |
24708 | + mddev->private = conf; | |
24709 | + | |
24710 | + if (evms_md_check_ordering(mddev)) { | |
24711 | + printk("linear: disks are not ordered, aborting!\n"); | |
24712 | + goto out; | |
24713 | + } | |
24714 | + | |
24715 | + /* | |
24716 | + * Find the smallest device. | |
24717 | + */ | |
24718 | + | |
24719 | + conf->smallest = NULL; | |
24720 | + curr_offset = 0; | |
24721 | + ITERATE_RDEV_ORDERED(mddev,rdev,j) { | |
24722 | + dev_info_t *disk = conf->disks + j; | |
24723 | + disk->node = rdev->node; | |
24724 | + disk->dev = rdev->dev; | |
24725 | + disk->size = rdev->size; | |
24726 | + disk->offset = curr_offset; | |
24727 | + | |
24728 | + curr_offset += disk->size; | |
24729 | + | |
24730 | + if (!conf->smallest || (disk->size < conf->smallest->size)) | |
24731 | + conf->smallest = disk; | |
24732 | + } | |
24733 | + | |
24734 | + nb_zone = conf->nr_zones = evms_md_size[mdidx(mddev)] / conf->smallest->size + | |
24735 | + ((evms_md_size[mdidx(mddev)] % conf->smallest->size) ? 1 : 0); | |
24736 | + | |
24737 | + conf->hash_table = kmalloc (sizeof (struct linear_hash) * nb_zone, | |
24738 | + GFP_KERNEL); | |
24739 | + if (!conf->hash_table) | |
24740 | + goto out; | |
24741 | + | |
24742 | + /* | |
24743 | + * Here we generate the linear hash table | |
24744 | + */ | |
24745 | + table = conf->hash_table; | |
24746 | + i = 0; | |
24747 | + size = 0; | |
24748 | + for (j = 0; j < mddev->nb_dev; j++) { | |
24749 | + dev_info_t *disk = conf->disks + j; | |
24750 | + | |
24751 | + if (size < 0) { | |
24752 | + table[-1].dev1 = disk; | |
24753 | + } | |
24754 | + size += disk->size; | |
24755 | + | |
24756 | + while (size>0) { | |
24757 | + table->dev0 = disk; | |
24758 | + table->dev1 = NULL; | |
24759 | + size -= conf->smallest->size; | |
24760 | + table++; | |
24761 | + } | |
24762 | + } | |
24763 | + if (table-conf->hash_table != nb_zone) | |
24764 | + BUG(); | |
24765 | + LOG_DETAILS("%s: nr_zones=%d, smallest=%lu\n", | |
24766 | + __FUNCTION__, conf->nr_zones, conf->smallest->size); | |
24767 | + return 0; | |
24768 | + | |
24769 | +out: | |
24770 | + if (conf) | |
24771 | + kfree(conf); | |
24772 | + MOD_DEC_USE_COUNT; | |
24773 | + return 1; | |
24774 | +} | |
24775 | + | |
24776 | +static int linear_stop (mddev_t *mddev) | |
24777 | +{ | |
24778 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
24779 | + | |
24780 | + kfree(conf->hash_table); | |
24781 | + kfree(conf); | |
24782 | + | |
24783 | + MOD_DEC_USE_COUNT; | |
24784 | + | |
24785 | + return 0; | |
24786 | +} | |
24787 | + | |
24788 | +/* | |
24789 | + * Function: linear_map | |
24790 | + */ | |
24791 | +static int linear_map( | |
24792 | + mddev_t *mddev, | |
24793 | + struct evms_logical_node **node, | |
24794 | + struct buffer_head *bh) | |
24795 | +{ | |
24796 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
24797 | + struct linear_hash *hash; | |
24798 | + dev_info_t *tmp_dev; | |
24799 | + unsigned long block; | |
24800 | + | |
24801 | + block = (bh->b_rsector >> 1); | |
24802 | + hash = conf->hash_table + (block / conf->smallest->size); | |
24803 | + if (block >= (hash->dev0->size + hash->dev0->offset)) { | |
24804 | + if (!hash->dev1) { | |
24805 | + LOG_ERROR("%s: hash->dev1==NULL for block %ld\n", __FUNCTION__, block); | |
24806 | + return -ENXIO; | |
24807 | + } | |
24808 | + tmp_dev = hash->dev1; | |
24809 | + } else | |
24810 | + tmp_dev = hash->dev0; | |
24811 | + | |
24812 | + if ( (block + (bh->b_size >> 10)) > (tmp_dev->size + tmp_dev->offset) | |
24813 | + || block < tmp_dev->offset) { | |
24814 | + LOG_ERROR("%s: Block %ld out of bounds on node %s size %ld offset %ld\n", | |
24815 | + __FUNCTION__, | |
24816 | + block, | |
24817 | + tmp_dev->node->name, | |
24818 | + tmp_dev->size, | |
24819 | + tmp_dev->offset); | |
24820 | + return -ENXIO; | |
24821 | + } | |
24822 | + bh->b_rsector -= (tmp_dev->offset << 1); | |
24823 | + *node = tmp_dev->node; | |
24824 | + return 0; | |
24825 | +} | |
24826 | + | |
24827 | +static void linear_read( | |
24828 | + struct evms_logical_node *md_node, | |
24829 | + struct buffer_head *bh) | |
24830 | +{ | |
24831 | + mddev_t *mddev = EVMS_MD_NODE_TO_MDDEV(md_node); | |
24832 | + struct evms_logical_node *node; | |
24833 | + | |
24834 | + if (evms_md_check_boundary(md_node, bh)) return; | |
24835 | + | |
24836 | + if (!linear_map(mddev, &node, bh)) { | |
24837 | + R_IO(node, bh); | |
24838 | + } else { | |
24839 | + bh->b_end_io(bh, 0); | |
24840 | + } | |
24841 | +} | |
24842 | + | |
24843 | +static void linear_write( | |
24844 | + struct evms_logical_node *md_node, | |
24845 | + struct buffer_head *bh) | |
24846 | +{ | |
24847 | + mddev_t *mddev = EVMS_MD_NODE_TO_MDDEV(md_node); | |
24848 | + struct evms_logical_node *node; | |
24849 | + | |
24850 | + if (evms_md_check_boundary(md_node, bh)) return; | |
24851 | + | |
24852 | + if (!linear_map(mddev, &node, bh)) { | |
24853 | + W_IO(node, bh); | |
24854 | + } else { | |
24855 | + bh->b_end_io(bh, 0); | |
24856 | + } | |
24857 | +} | |
24858 | + | |
24859 | +static int linear_status (char *page, mddev_t *mddev) | |
24860 | +{ | |
24861 | + int sz = 0; | |
24862 | + | |
24863 | +#undef MD_DEBUG | |
24864 | +#ifdef MD_DEBUG | |
24865 | + int j; | |
24866 | + linear_conf_t *conf = mddev_to_conf(mddev); | |
24867 | + | |
24868 | + sz += sprintf(page+sz, " "); | |
24869 | + for (j = 0; j < conf->nr_zones; j++) | |
24870 | + { | |
24871 | + sz += sprintf(page+sz, "[%s", | |
24872 | + partition_name(conf->hash_table[j].dev0->dev)); | |
24873 | + | |
24874 | + if (conf->hash_table[j].dev1) | |
24875 | + sz += sprintf(page+sz, "/%s] ", | |
24876 | + partition_name(conf->hash_table[j].dev1->dev)); | |
24877 | + else | |
24878 | + sz += sprintf(page+sz, "] "); | |
24879 | + } | |
24880 | + sz += sprintf(page+sz, "\n"); | |
24881 | +#endif | |
24882 | + sz += sprintf(page+sz, " %dk rounding", mddev->chunk_size/1024); | |
24883 | + return sz; | |
24884 | +} | |
24885 | + | |
24886 | +static int linear_evms_ioctl ( | |
24887 | + mddev_t * mddev, | |
24888 | + struct inode * inode, | |
24889 | + struct file * file, | |
24890 | + unsigned int cmd, | |
24891 | + unsigned long arg) | |
24892 | +{ | |
24893 | + int rc = 0; | |
24894 | + struct evms_logical_node *node; | |
24895 | + | |
24896 | + switch (cmd) { | |
24897 | + case EVMS_GET_BMAP: | |
24898 | + { | |
24899 | + struct evms_get_bmap_pkt *bmap = (struct evms_get_bmap_pkt *)arg; | |
24900 | + struct buffer_head *bh = | |
24901 | + evms_cs_allocate_from_pool(evms_bh_pool, FALSE); | |
24902 | + if (bh) { | |
24903 | + bh->b_rsector = (unsigned long)bmap->rsector; | |
24904 | + bh->b_size = node->block_size; | |
24905 | + rc = linear_map(mddev, &node, bh); | |
24906 | + if (!rc) { | |
24907 | + bmap->rsector = (u64)bh->b_rsector; | |
24908 | + if (node) | |
24909 | + rc = IOCTL(node, inode, file, cmd, arg); | |
24910 | + else | |
24911 | + rc = -ENODEV; | |
24912 | + } | |
24913 | + evms_cs_deallocate_to_pool(evms_bh_pool, bh); | |
24914 | + } else | |
24915 | + rc = -ENOMEM; | |
24916 | + break; | |
24917 | + } | |
24918 | + | |
24919 | + default: | |
24920 | + rc = -EINVAL; | |
24921 | + } | |
24922 | + return rc; | |
24923 | +} | |
24924 | + | |
24925 | +static mdk_personality_t linear_personality = { | |
24926 | + .name = "evms_linear", | |
24927 | + .read = linear_read, | |
24928 | + .write = linear_write, | |
24929 | + .run = linear_run, | |
24930 | + .stop = linear_stop, | |
24931 | + .status = linear_status, | |
24932 | + .evms_ioctl = linear_evms_ioctl | |
24933 | +}; | |
24934 | + | |
24935 | +static int md__init linear_init (void) | |
24936 | +{ | |
24937 | + return evms_register_md_personality (LINEAR, &linear_personality); | |
24938 | +} | |
24939 | + | |
24940 | +static void linear_exit (void) | |
24941 | +{ | |
24942 | + evms_unregister_md_personality (LINEAR); | |
24943 | +} | |
24944 | + | |
24945 | + | |
24946 | +module_init(linear_init); | |
24947 | +module_exit(linear_exit); | |
24948 | +#ifdef MODULE_LICENSE | |
24949 | +MODULE_LICENSE("GPL"); | |
24950 | +#endif | |
24951 | diff -Naur linux-2002-09-30/drivers/evms/md_raid0.c evms-2002-09-30/drivers/evms/md_raid0.c | |
24952 | --- linux-2002-09-30/drivers/evms/md_raid0.c Wed Dec 31 18:00:00 1969 | |
24953 | +++ evms-2002-09-30/drivers/evms/md_raid0.c Thu Aug 15 13:50:12 2002 | |
24954 | @@ -0,0 +1,448 @@ | |
24955 | +/* | |
24956 | + raid0.c : Multiple Devices driver for Linux | |
24957 | + Copyright (C) 1994-96 Marc ZYNGIER | |
24958 | + <zyngier@ufr-info-p7.ibp.fr> or | |
24959 | + <maz@gloups.fdn.fr> | |
24960 | + Copyright (C) 1999, 2000 Ingo Molnar, Red Hat | |
24961 | + | |
24962 | + | |
24963 | + RAID-0 management functions. | |
24964 | + | |
24965 | + This program is free software; you can redistribute it and/or modify | |
24966 | + it under the terms of the GNU General Public License as published by | |
24967 | + the Free Software Foundation; either version 2, or (at your option) | |
24968 | + any later version. | |
24969 | + | |
24970 | + You should have received a copy of the GNU General Public License | |
24971 | + (for example /usr/src/linux/COPYING); if not, write to the Free | |
24972 | + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24973 | +*/ | |
24974 | + | |
24975 | +#include <linux/module.h> | |
24976 | +#include <linux/evms/evms_raid0.h> | |
24977 | + | |
24978 | +#define MAJOR_NR MD_MAJOR | |
24979 | +#define MD_DRIVER | |
24980 | +#define MD_PERSONALITY | |
24981 | + | |
24982 | +#define LOG_PREFIX "md raid0: " | |
24983 | + | |
24984 | +static int create_strip_zones (mddev_t *mddev) | |
24985 | +{ | |
24986 | + int i, c, j, j1, j2; | |
24987 | + unsigned long current_offset, curr_zone_offset, rdev_size_in_sects; | |
24988 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
24989 | + mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; | |
24990 | + | |
24991 | + /* | |
24992 | + * The number of 'same size groups' | |
24993 | + */ | |
24994 | + conf->nr_strip_zones = 0; | |
24995 | + | |
24996 | + ITERATE_RDEV_ORDERED(mddev,rdev1,j1) { | |
24997 | + LOG_DEBUG(" looking at %s\n", evms_md_partition_name(rdev1->node)); | |
24998 | + c = 0; | |
24999 | + ITERATE_RDEV_ORDERED(mddev,rdev2,j2) { | |
25000 | + LOG_DEBUG(" comparing %s(%ld sectors) with %s(%ld sectors)\n", | |
25001 | + evms_md_partition_name(rdev1->node), rdev1->size << 1, | |
25002 | + evms_md_partition_name(rdev2->node), rdev2->size << 1); | |
25003 | + if (rdev2 == rdev1) { | |
25004 | + LOG_DEBUG(" END\n"); | |
25005 | + break; | |
25006 | + } | |
25007 | + if (rdev2->size == rdev1->size) | |
25008 | + { | |
25009 | + /* | |
25010 | + * Not unique, dont count it as a new | |
25011 | + * group | |
25012 | + */ | |
25013 | + LOG_DEBUG(" EQUAL\n"); | |
25014 | + c = 1; | |
25015 | + break; | |
25016 | + } | |
25017 | + LOG_DEBUG(" NOT EQUAL\n"); | |
25018 | + } | |
25019 | + if (!c) { | |
25020 | + LOG_DEBUG(" ==> UNIQUE\n"); | |
25021 | + conf->nr_strip_zones++; | |
25022 | + LOG_DEBUG(" %d zones\n",conf->nr_strip_zones); | |
25023 | + } | |
25024 | + } | |
25025 | + LOG_DEBUG(" FINAL %d zones\n",conf->nr_strip_zones); | |
25026 | + | |
25027 | + conf->strip_zone = vmalloc(sizeof(struct strip_zone)* | |
25028 | + conf->nr_strip_zones); | |
25029 | + if (!conf->strip_zone) | |
25030 | + return 1; | |
25031 | + | |
25032 | + | |
25033 | + conf->smallest = NULL; | |
25034 | + current_offset = 0; | |
25035 | + curr_zone_offset = 0; | |
25036 | + | |
25037 | + for (i = 0; i < conf->nr_strip_zones; i++) | |
25038 | + { | |
25039 | + struct strip_zone *zone = conf->strip_zone + i; | |
25040 | + | |
25041 | + LOG_DEBUG(" zone %d\n", i); | |
25042 | + zone->dev_offset = current_offset; | |
25043 | + smallest = NULL; | |
25044 | + c = 0; | |
25045 | + | |
25046 | + ITERATE_RDEV_ORDERED(mddev,rdev,j) { | |
25047 | + | |
25048 | + LOG_DEBUG(" checking %s ...",evms_md_partition_name(rdev->node)); | |
25049 | + rdev_size_in_sects = rdev->size << 1; | |
25050 | + if (rdev_size_in_sects > current_offset) | |
25051 | + { | |
25052 | + LOG_DEBUG(" contained as device %d\n", c); | |
25053 | + zone->node[c] = rdev->node; | |
25054 | + c++; | |
25055 | + if (!smallest || (rdev_size_in_sects < (smallest->size <<1) )) { | |
25056 | + smallest = rdev; | |
25057 | + LOG_DEBUG(" (%ld) is smallest!.\n", rdev_size_in_sects); | |
25058 | + } | |
25059 | + } else | |
25060 | + LOG_DEBUG(" nope.\n"); | |
25061 | + } | |
25062 | + | |
25063 | + zone->nb_dev = c; | |
25064 | + zone->size_in_sects = ((smallest->size <<1) - current_offset) * c; | |
25065 | + LOG_DEBUG(" zone->nb_dev: %d, size: %ld\n", | |
25066 | + zone->nb_dev,zone->size_in_sects); | |
25067 | + | |
25068 | + if (!conf->smallest || (zone->size_in_sects < conf->smallest->size_in_sects)) | |
25069 | + conf->smallest = zone; | |
25070 | + | |
25071 | + zone->zone_offset = curr_zone_offset; | |
25072 | + curr_zone_offset += zone->size_in_sects; | |
25073 | + | |
25074 | + current_offset = smallest->size << 1; | |
25075 | + LOG_DEBUG(" current zone offset: %ld\n",current_offset); | |
25076 | + } | |
25077 | + LOG_DEBUG(" done.\n"); | |
25078 | + return 0; | |
25079 | +} | |
25080 | + | |
25081 | +static int raid0_run (mddev_t *mddev) | |
25082 | +{ | |
25083 | + unsigned long cur=0, i=0, size, zone0_size, nb_zone; | |
25084 | + unsigned long mddev_size_in_sects = evms_md_size[mdidx(mddev)] << 1; | |
25085 | + raid0_conf_t *conf; | |
25086 | + | |
25087 | + MOD_INC_USE_COUNT; | |
25088 | + | |
25089 | + conf = vmalloc(sizeof (raid0_conf_t)); | |
25090 | + if (!conf) | |
25091 | + goto out; | |
25092 | + mddev->private = (void *)conf; | |
25093 | + | |
25094 | + if (evms_md_check_ordering(mddev)) { | |
25095 | + LOG_ERROR("disks are not ordered, aborting!\n"); | |
25096 | + goto out_free_conf; | |
25097 | + } | |
25098 | + | |
25099 | + if (create_strip_zones (mddev)) | |
25100 | + goto out_free_conf; | |
25101 | + | |
25102 | + LOG_DETAILS("evms_md_size is %ld sectors.\n", mddev_size_in_sects); | |
25103 | + LOG_DETAILS("conf->smallest->size_in_sects is %ld sectors.\n", conf->smallest->size_in_sects); | |
25104 | + nb_zone = mddev_size_in_sects / conf->smallest->size_in_sects + | |
25105 | + (mddev_size_in_sects % conf->smallest->size_in_sects ? 1 : 0); | |
25106 | + LOG_DETAILS("nb_zone is %ld.\n", nb_zone); | |
25107 | + conf->nr_zones = nb_zone; | |
25108 | + | |
25109 | + LOG_DEBUG("Allocating %ld bytes for hash.\n", nb_zone*sizeof(struct raid0_hash)); | |
25110 | + | |
25111 | + conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone); | |
25112 | + if (!conf->hash_table) | |
25113 | + goto out_free_zone_conf; | |
25114 | + size = conf->strip_zone[cur].size_in_sects; | |
25115 | + | |
25116 | + i = 0; | |
25117 | + while (cur < conf->nr_strip_zones) { | |
25118 | + conf->hash_table[i].zone0 = conf->strip_zone + cur; | |
25119 | + | |
25120 | + /* | |
25121 | + * If we completely fill the slot | |
25122 | + */ | |
25123 | + if (size >= conf->smallest->size_in_sects) { | |
25124 | + conf->hash_table[i++].zone1 = NULL; | |
25125 | + size -= conf->smallest->size_in_sects; | |
25126 | + | |
25127 | + if (!size) { | |
25128 | + if (++cur == conf->nr_strip_zones) | |
25129 | + continue; | |
25130 | + size = conf->strip_zone[cur].size_in_sects; | |
25131 | + } | |
25132 | + continue; | |
25133 | + } | |
25134 | + if (++cur == conf->nr_strip_zones) { | |
25135 | + /* | |
25136 | + * Last dev, set unit1 as NULL | |
25137 | + */ | |
25138 | + conf->hash_table[i].zone1=NULL; | |
25139 | + continue; | |
25140 | + } | |
25141 | + | |
25142 | + /* | |
25143 | + * Here we use a 2nd dev to fill the slot | |
25144 | + */ | |
25145 | + zone0_size = size; | |
25146 | + size = conf->strip_zone[cur].size_in_sects; | |
25147 | + conf->hash_table[i++].zone1 = conf->strip_zone + cur; | |
25148 | + size -= (conf->smallest->size_in_sects - zone0_size); | |
25149 | + } | |
25150 | + return 0; | |
25151 | + | |
25152 | +out_free_zone_conf: | |
25153 | + vfree(conf->strip_zone); | |
25154 | + conf->strip_zone = NULL; | |
25155 | + | |
25156 | +out_free_conf: | |
25157 | + vfree(conf); | |
25158 | + mddev->private = NULL; | |
25159 | +out: | |
25160 | + MOD_DEC_USE_COUNT; | |
25161 | + return 1; | |
25162 | +} | |
25163 | + | |
25164 | +static int raid0_stop (mddev_t *mddev) | |
25165 | +{ | |
25166 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
25167 | + | |
25168 | + vfree (conf->hash_table); | |
25169 | + conf->hash_table = NULL; | |
25170 | + vfree (conf->strip_zone); | |
25171 | + conf->strip_zone = NULL; | |
25172 | + vfree (conf); | |
25173 | + mddev->private = NULL; | |
25174 | + | |
25175 | + MOD_DEC_USE_COUNT; | |
25176 | + return 0; | |
25177 | +} | |
25178 | + | |
25179 | + | |
25180 | +/* | |
25181 | + * Function: raid0_map | |
25182 | + * | |
25183 | + * Return 0 for success, else error | |
25184 | + * | |
25185 | + */ | |
25186 | + | |
25187 | +static inline int raid0_map( | |
25188 | + mddev_t *mddev, | |
25189 | + unsigned long lsn, | |
25190 | + unsigned long size, | |
25191 | + struct evms_logical_node **node, | |
25192 | + unsigned long *new_lsn, | |
25193 | + unsigned long *new_size) | |
25194 | +{ | |
25195 | + unsigned int sect_in_chunk, chunksize_bits, chunk_size_in_sects; | |
25196 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
25197 | + struct raid0_hash *hash; | |
25198 | + struct strip_zone *zone; | |
25199 | + unsigned long chunk; | |
25200 | + | |
25201 | + chunk_size_in_sects = mddev->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
25202 | + chunksize_bits = ffz(~chunk_size_in_sects); | |
25203 | + hash = conf->hash_table + (lsn / conf->smallest->size_in_sects); | |
25204 | + | |
25205 | + /* Sanity check */ | |
25206 | + if (!hash) | |
25207 | + goto bad_hash; | |
25208 | + | |
25209 | + if (!hash->zone0) | |
25210 | + goto bad_zone0; | |
25211 | + | |
25212 | + if (lsn >= (hash->zone0->size_in_sects + hash->zone0->zone_offset)) { | |
25213 | + if (!hash->zone1) | |
25214 | + goto bad_zone1; | |
25215 | + zone = hash->zone1; | |
25216 | + } else | |
25217 | + zone = hash->zone0; | |
25218 | + | |
25219 | + sect_in_chunk = lsn & (chunk_size_in_sects - 1); | |
25220 | + chunk = (lsn - zone->zone_offset) / (zone->nb_dev << chunksize_bits); | |
25221 | + *node = zone->node[(lsn >> chunksize_bits) % zone->nb_dev]; | |
25222 | + | |
25223 | + *new_lsn = ((chunk << chunksize_bits) + zone->dev_offset) + sect_in_chunk; | |
25224 | + | |
25225 | + *new_size = (size <= chunk_size_in_sects - sect_in_chunk) ? | |
25226 | + size : chunk_size_in_sects - sect_in_chunk; | |
25227 | + | |
25228 | + return 0; | |
25229 | + | |
25230 | +bad_hash: | |
25231 | + LOG_ERROR("%s: bug: hash==NULL for lsn %lu\n", __FUNCTION__, lsn); | |
25232 | + goto outerr; | |
25233 | +bad_zone0: | |
25234 | + LOG_ERROR("%s: bug: hash->zone0==NULL for lsn %lu\n", __FUNCTION__, lsn); | |
25235 | + goto outerr; | |
25236 | +bad_zone1: | |
25237 | + LOG_ERROR("%s: bug: hash->zone1==NULL for lsn %lu\n", __FUNCTION__, lsn); | |
25238 | +outerr: | |
25239 | + return -EINVAL; | |
25240 | +} | |
25241 | + | |
25242 | +void raid0_error(int rw, struct evms_logical_node *node, struct buffer_head *bh) | |
25243 | +{ | |
25244 | + LOG_ERROR(" %s FAILED on node(%s) rsector(%lu) size(%d)\n", | |
25245 | + (rw == READ) ? "READ" : "WRITE", | |
25246 | + node->name, | |
25247 | + bh->b_rsector, | |
25248 | + bh->b_size); | |
25249 | + | |
25250 | + bh->b_end_io(bh, 0); | |
25251 | +} | |
25252 | + | |
25253 | +static inline void raid0_rw ( | |
25254 | + struct evms_logical_node *md_node, | |
25255 | + struct buffer_head *bh, | |
25256 | + int rw) | |
25257 | +{ | |
25258 | + mddev_t *mddev = EVMS_MD_NODE_TO_MDDEV(md_node); | |
25259 | + struct evms_logical_node *node; | |
25260 | + unsigned long new_lsn, size_in_sects, new_size; | |
25261 | + | |
25262 | + if (evms_md_check_boundary(md_node, bh)) return; | |
25263 | + size_in_sects = bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
25264 | + if (!raid0_map(mddev, bh->b_rsector, size_in_sects, &node, &new_lsn, &new_size)) { | |
25265 | + if (new_size == size_in_sects) { | |
25266 | + /* | |
25267 | + * This is the normal case: | |
25268 | + * the request is entirely within the stripe boundary | |
25269 | + */ | |
25270 | + bh->b_rsector = new_lsn; | |
25271 | + if (rw == READ) { | |
25272 | + R_IO(node, bh); | |
25273 | + } else { | |
25274 | + W_IO(node, bh); | |
25275 | + } | |
25276 | + return; | |
25277 | + } else { | |
25278 | + /* | |
25279 | + * BUGBUG! | |
25280 | + * Need more processing here (ie. break up the request) | |
25281 | + */ | |
25282 | + LOG_ERROR("This version of EVMS RAID0 does not support I/O requests that are:\n"); | |
25283 | + LOG_ERROR(" - larger than the stripe size\n"); | |
25284 | + LOG_ERROR(" - cross the stripe boundary\n"); | |
25285 | + } | |
25286 | + } | |
25287 | + raid0_error(rw, node, bh); | |
25288 | +} | |
25289 | + | |
25290 | +static void raid0_read( | |
25291 | + struct evms_logical_node *md_node, | |
25292 | + struct buffer_head *bh) | |
25293 | +{ | |
25294 | + raid0_rw(md_node, bh, READ); | |
25295 | +} | |
25296 | + | |
25297 | +static void raid0_write( | |
25298 | + struct evms_logical_node *md_node, | |
25299 | + struct buffer_head *bh) | |
25300 | +{ | |
25301 | + raid0_rw(md_node, bh, WRITE); | |
25302 | +} | |
25303 | + | |
25304 | +static int raid0_status (char *page, mddev_t *mddev) | |
25305 | +{ | |
25306 | + int sz = 0; | |
25307 | +#undef MD_DEBUG | |
25308 | +#ifdef MD_DEBUG | |
25309 | + int j, k; | |
25310 | + raid0_conf_t *conf = mddev_to_conf(mddev); | |
25311 | + | |
25312 | + sz += sprintf(page + sz, " "); | |
25313 | + for (j = 0; j < conf->nr_zones; j++) { | |
25314 | + sz += sprintf(page + sz, "[z%d", | |
25315 | + conf->hash_table[j].zone0 - conf->strip_zone); | |
25316 | + if (conf->hash_table[j].zone1) | |
25317 | + sz += sprintf(page+sz, "/z%d] ", | |
25318 | + conf->hash_table[j].zone1 - conf->strip_zone); | |
25319 | + else | |
25320 | + sz += sprintf(page+sz, "] "); | |
25321 | + } | |
25322 | + | |
25323 | + sz += sprintf(page + sz, "\n"); | |
25324 | + | |
25325 | + for (j = 0; j < conf->nr_strip_zones; j++) { | |
25326 | + sz += sprintf(page + sz, " z%d=[", j); | |
25327 | + for (k = 0; k < conf->strip_zone[j].nb_dev; k++) | |
25328 | + sz += sprintf (page+sz, "%s/", conf->strip_zone[j].node[k]->name); | |
25329 | + sz--; | |
25330 | + sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n", | |
25331 | + conf->strip_zone[j].zone_offset, | |
25332 | + conf->strip_zone[j].dev_offset, | |
25333 | + conf->strip_zone[j].size_in_sects); | |
25334 | + } | |
25335 | +#endif | |
25336 | + sz += sprintf(page + sz, " %dk chunks", mddev->chunk_size/1024); | |
25337 | + return sz; | |
25338 | +} | |
25339 | + | |
25340 | +static int raid0_evms_ioctl ( | |
25341 | + mddev_t * mddev, | |
25342 | + struct inode * inode, | |
25343 | + struct file * file, | |
25344 | + unsigned int cmd, | |
25345 | + unsigned long arg) | |
25346 | +{ | |
25347 | + int rc = 0; | |
25348 | + struct evms_logical_node *node; | |
25349 | + | |
25350 | + switch (cmd) { | |
25351 | + case EVMS_GET_BMAP: | |
25352 | + { | |
25353 | + struct evms_get_bmap_pkt *bmap = (struct evms_get_bmap_pkt *)arg; | |
25354 | + unsigned long new_lsn, new_size; | |
25355 | + unsigned long size = mddev->node->block_size >> EVMS_VSECTOR_SIZE_SHIFT; | |
25356 | + rc = raid0_map(mddev, | |
25357 | + (unsigned long)bmap->rsector, | |
25358 | + size, | |
25359 | + &node, | |
25360 | + &new_lsn, | |
25361 | + &new_size); | |
25362 | + if (!rc) { | |
25363 | + if (node) { | |
25364 | + bmap->rsector = (u64)new_lsn; | |
25365 | + rc = IOCTL(node, inode, file, cmd, arg); | |
25366 | + } else | |
25367 | + rc = -ENODEV; | |
25368 | + } | |
25369 | + break; | |
25370 | + } | |
25371 | + | |
25372 | + default: | |
25373 | + rc = -EINVAL; | |
25374 | + } | |
25375 | + return rc; | |
25376 | +} | |
25377 | + | |
25378 | +static mdk_personality_t raid0_personality = { | |
25379 | + .name = "evms_raid0", | |
25380 | + .read = raid0_read, | |
25381 | + .write = raid0_write, | |
25382 | + .run = raid0_run, | |
25383 | + .stop = raid0_stop, | |
25384 | + .status = raid0_status, | |
25385 | + .evms_ioctl = raid0_evms_ioctl | |
25386 | +}; | |
25387 | + | |
25388 | +static int md__init raid0_init (void) | |
25389 | +{ | |
25390 | + return evms_register_md_personality (RAID0, &raid0_personality); | |
25391 | +} | |
25392 | + | |
25393 | +static void raid0_exit (void) | |
25394 | +{ | |
25395 | + evms_unregister_md_personality (RAID0); | |
25396 | +} | |
25397 | + | |
25398 | +module_init(raid0_init); | |
25399 | +module_exit(raid0_exit); | |
25400 | +#ifdef MODULE_LICENSE | |
25401 | +MODULE_LICENSE("GPL"); | |
25402 | +#endif | |
25403 | diff -Naur linux-2002-09-30/drivers/evms/md_raid1.c evms-2002-09-30/drivers/evms/md_raid1.c | |
25404 | --- linux-2002-09-30/drivers/evms/md_raid1.c Wed Dec 31 18:00:00 1969 | |
25405 | +++ evms-2002-09-30/drivers/evms/md_raid1.c Mon Sep 30 00:02:48 2002 | |
25406 | @@ -0,0 +1,1935 @@ | |
25407 | +/* | |
25408 | + * md_raid1.c : Multiple Devices driver for Linux | |
25409 | + * | |
25410 | + * Copyright (C) 1999, 2000 Ingo Molnar, Red Hat | |
25411 | + * | |
25412 | + * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman | |
25413 | + * | |
25414 | + * RAID-1 management functions. | |
25415 | + * | |
25416 | + * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000 | |
25417 | + * | |
25418 |