]>
Commit | Line | Data |
---|---|---|
3abc4eb0 AM |
1 | =================================================================== |
2 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6.h,v | |
3 | retrieving revision 1.2 | |
4 | diff -u -r1.2 raid6.h | |
5 | --- linux-2.5/drivers/md/raid6.h 21 Jan 2004 03:11:33 -0000 1.2 | |
6 | +++ linux-2.5/drivers/md/raid6.h 21 Jan 2004 16:29:56 -0000 | |
7 | @@ -63,6 +63,7 @@ | |
8 | ||
9 | #define __init | |
10 | #define __exit | |
11 | +#define __attribute_const__ __attribute__((const)) | |
12 | ||
13 | #define preempt_enable() | |
14 | #define preempt_disable() | |
15 | =================================================================== | |
16 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6algos.c,v | |
17 | retrieving revision 1.2 | |
18 | diff -u -r1.2 raid6algos.c | |
19 | --- linux-2.5/drivers/md/raid6algos.c 21 Jan 2004 03:11:33 -0000 1.2 | |
20 | +++ linux-2.5/drivers/md/raid6algos.c 21 Jan 2004 16:42:19 -0000 | |
21 | @@ -46,7 +46,7 @@ | |
22 | &raid6_intx16, | |
23 | &raid6_intx32, | |
24 | #endif | |
25 | -#if defined(__i386__) || defined(__x86_64__) | |
26 | +#if defined(__i386__) | |
27 | &raid6_mmxx1, | |
28 | &raid6_mmxx2, | |
29 | &raid6_sse1x1, | |
30 | @@ -55,6 +55,8 @@ | |
31 | &raid6_sse2x2, | |
32 | #endif | |
33 | #if defined(__x86_64__) | |
34 | + &raid6_sse2x1, | |
35 | + &raid6_sse2x2, | |
36 | &raid6_sse2x4, | |
37 | #endif | |
38 | NULL | |
39 | =================================================================== | |
40 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6int.uc,v | |
41 | retrieving revision 1.2 | |
42 | diff -u -r1.2 raid6int.uc | |
43 | --- linux-2.5/drivers/md/raid6int.uc 21 Jan 2004 03:11:33 -0000 1.2 | |
44 | +++ linux-2.5/drivers/md/raid6int.uc 21 Jan 2004 16:34:17 -0000 | |
45 | @@ -1,6 +1,6 @@ | |
46 | /* -*- linux-c -*- ------------------------------------------------------- * | |
47 | * | |
48 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
49 | + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
50 | * | |
51 | * This program is free software; you can redistribute it and/or modify | |
52 | * it under the terms of the GNU General Public License as published by | |
53 | @@ -18,14 +18,46 @@ | |
54 | * This file is postprocessed using unroller.pl | |
55 | */ | |
56 | ||
57 | +#include <linux/compiler.h> | |
58 | #include "raid6.h" | |
59 | ||
60 | /* | |
61 | * IA-64 wants insane amounts of unrolling. On other architectures that | |
62 | * is just a waste of space. | |
63 | */ | |
64 | +#if ($# <= 8) || defined(__ia64__) | |
65 | ||
66 | -#if ($# <= 8) || defined(_ia64__) | |
67 | + | |
68 | +/* | |
69 | + * These sub-operations are separate inlines since they can sometimes be | |
70 | + * specially optimized using architecture-specific hacks. | |
71 | + */ | |
72 | + | |
73 | +/* | |
74 | + * The SHLBYTE() operation shifts each byte left by 1, *not* | |
75 | + * rolling over into the next byte | |
76 | + */ | |
77 | +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) | |
78 | +{ | |
79 | + unative_t vv; | |
80 | + | |
81 | + vv = (v << 1) & NBYTES(0xfe); | |
82 | + return vv; | |
83 | +} | |
84 | + | |
85 | +/* | |
86 | + * The MASK() operation returns 0xFF in any byte for which the high | |
87 | + * bit is 1, 0x00 for any byte for which the high bit is 0. | |
88 | + */ | |
89 | +static inline __attribute_const__ unative_t MASK(unative_t v) | |
90 | +{ | |
91 | + unative_t vv; | |
92 | + | |
93 | + vv = v & NBYTES(0x80); | |
94 | + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ | |
95 | + return vv; | |
96 | +} | |
97 | + | |
98 | ||
99 | static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | |
100 | { | |
101 | @@ -44,9 +76,8 @@ | |
102 | for ( z = z0-1 ; z >= 0 ; z-- ) { | |
103 | wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; | |
104 | wp$$ ^= wd$$; | |
105 | - w2$$ = wq$$ & NBYTES(0x80); | |
106 | - w1$$ = (wq$$ << 1) & NBYTES(0xfe); | |
107 | - w2$$ = (w2$$ << 1) - (w2$$ >> 7); | |
108 | + w2$$ = MASK(wq$$); | |
109 | + w1$$ = SHLBYTE(wq$$); | |
110 | w2$$ &= NBYTES(0x1d); | |
111 | w1$$ ^= w2$$; | |
112 | wq$$ = w1$$ ^ wd$$; | |
113 | =================================================================== | |
114 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6mmx.c,v | |
115 | retrieving revision 1.2 | |
116 | diff -u -r1.2 raid6mmx.c | |
117 | --- linux-2.5/drivers/md/raid6mmx.c 21 Jan 2004 03:11:33 -0000 1.2 | |
118 | +++ linux-2.5/drivers/md/raid6mmx.c 21 Jan 2004 16:41:40 -0000 | |
119 | @@ -16,7 +16,7 @@ | |
120 | * MMX implementation of RAID-6 syndrome functions | |
121 | */ | |
122 | ||
123 | -#if defined(__i386__) || defined(__x86_64__) | |
124 | +#if defined(__i386__) | |
125 | ||
126 | #include "raid6.h" | |
127 | #include "raid6x86.h" | |
128 | =================================================================== | |
129 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6recov.c,v | |
130 | retrieving revision 1.2 | |
131 | diff -u -r1.2 raid6recov.c | |
132 | --- linux-2.5/drivers/md/raid6recov.c 21 Jan 2004 03:11:33 -0000 1.2 | |
133 | +++ linux-2.5/drivers/md/raid6recov.c 21 Jan 2004 16:31:58 -0000 | |
134 | @@ -117,7 +117,7 @@ | |
135 | } else { | |
136 | /* data+Q failure. Reconstruct data from P, | |
137 | then rebuild syndrome. */ | |
138 | - /* FIX */ | |
139 | + /* NOT IMPLEMENTED - equivalent to RAID-5 */ | |
140 | } | |
141 | } else { | |
142 | if ( failb == disks-2 ) { | |
143 | =================================================================== | |
144 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6sse1.c,v | |
145 | retrieving revision 1.2 | |
146 | diff -u -r1.2 raid6sse1.c | |
147 | --- linux-2.5/drivers/md/raid6sse1.c 21 Jan 2004 03:11:33 -0000 1.2 | |
148 | +++ linux-2.5/drivers/md/raid6sse1.c 21 Jan 2004 16:41:48 -0000 | |
149 | @@ -21,7 +21,7 @@ | |
150 | * worthwhile as a separate implementation. | |
151 | */ | |
152 | ||
153 | -#if defined(__i386__) || defined(__x86_64__) | |
154 | +#if defined(__i386__) | |
155 | ||
156 | #include "raid6.h" | |
157 | #include "raid6x86.h" | |
158 | =================================================================== | |
159 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6x86.h,v | |
160 | retrieving revision 1.2 | |
161 | diff -u -r1.2 raid6x86.h | |
162 | --- linux-2.5/drivers/md/raid6x86.h 21 Jan 2004 03:11:33 -0000 1.2 | |
163 | +++ linux-2.5/drivers/md/raid6x86.h 21 Jan 2004 19:50:50 -0000 | |
164 | @@ -1,7 +1,7 @@ | |
165 | #ident "$Id$" | |
166 | /* ----------------------------------------------------------------------- * | |
167 | * | |
168 | - * Copyright 2002 H. Peter Anvin - All Rights Reserved | |
169 | + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
170 | * | |
171 | * This program is free software; you can redistribute it and/or modify | |
172 | * it under the terms of the GNU General Public License as published by | |
173 | @@ -22,54 +22,75 @@ | |
174 | ||
175 | #if defined(__i386__) || defined(__x86_64__) | |
176 | ||
177 | +#ifdef __x86_64__ | |
178 | + | |
179 | typedef struct { | |
180 | unsigned int fsave[27]; | |
181 | - unsigned int cr0; | |
182 | -} raid6_mmx_save_t; | |
183 | + unsigned long cr0; | |
184 | +} raid6_mmx_save_t __attribute__((aligned(16))); | |
185 | ||
186 | /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since | |
187 | the code doesn't know about the additional x86-64 registers */ | |
188 | -/* The +3 is so we can make sure the area is aligned properly */ | |
189 | typedef struct { | |
190 | - unsigned int sarea[8*4+3]; | |
191 | + unsigned int sarea[8*4]; | |
192 | unsigned int cr0; | |
193 | } raid6_sse_save_t __attribute__((aligned(16))); | |
194 | ||
195 | -#ifdef __x86_64__ | |
196 | - | |
197 | /* This is for x86-64-specific code which uses all 16 XMM registers */ | |
198 | typedef struct { | |
199 | - unsigned int sarea[16*4+3]; | |
200 | - unsigned int cr0; | |
201 | + unsigned int sarea[16*4]; | |
202 | + unsigned long cr0; | |
203 | } raid6_sse16_save_t __attribute__((aligned(16))); | |
204 | ||
205 | +/* On x86-64 the stack is 16-byte aligned */ | |
206 | +#define SAREA(x) (x->sarea) | |
207 | + | |
208 | +#else /* __i386__ */ | |
209 | + | |
210 | +typedef struct { | |
211 | + unsigned int fsave[27]; | |
212 | + unsigned long cr0; | |
213 | +} raid6_mmx_save_t; | |
214 | + | |
215 | +/* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte | |
216 | + alignment. The +3 is so we have the slack space to manually align | |
217 | + a properly-sized area correctly. */ | |
218 | +typedef struct { | |
219 | + unsigned int sarea[8*4+3]; | |
220 | + unsigned long cr0; | |
221 | +} raid6_sse_save_t; | |
222 | + | |
223 | +#define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | |
224 | + | |
225 | #endif | |
226 | ||
227 | #ifdef __KERNEL__ /* Real code */ | |
228 | ||
229 | - static inline u32 raid6_get_fpu(void) | |
230 | +/* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */ | |
231 | + | |
232 | +static inline unsigned long raid6_get_fpu(void) | |
233 | { | |
234 | - u32 cr0; | |
235 | + unsigned long cr0; | |
236 | ||
237 | preempt_disable(); | |
238 | - asm volatile("movl %%cr0,%0 ; clts" : "=r" (cr0)); | |
239 | + asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0)); | |
240 | return cr0; | |
241 | } | |
242 | ||
243 | -static inline void raid6_put_fpu(u32 cr0) | |
244 | +static inline void raid6_put_fpu(unsigned long cr0) | |
245 | { | |
246 | - asm volatile("movl %0,%%cr0" : : "r" (cr0)); | |
247 | + asm volatile("mov %0,%%cr0" : : "r" (cr0)); | |
248 | preempt_enable(); | |
249 | } | |
250 | ||
251 | #else /* Dummy code for user space testing */ | |
252 | ||
253 | -static inline u32 raid6_get_fpu(void) | |
254 | +static inline unsigned long raid6_get_fpu(void) | |
255 | { | |
256 | return 0xf00ba6; | |
257 | } | |
258 | ||
259 | -static inline void raid6_put_fpu(u32 cr0) | |
260 | +static inline void raid6_put_fpu(unsigned long cr0) | |
261 | { | |
262 | (void)cr0; | |
263 | } | |
264 | @@ -90,13 +111,8 @@ | |
265 | ||
266 | static inline void raid6_before_sse(raid6_sse_save_t *s) | |
267 | { | |
268 | -#ifdef __x86_64__ | |
269 | - unsigned int *rsa = s->sarea; | |
270 | -#else | |
271 | - /* On i386 the save area may not be aligned */ | |
272 | - unsigned int *rsa = | |
273 | - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); | |
274 | -#endif | |
275 | + unsigned int *rsa = SAREA(s); | |
276 | + | |
277 | s->cr0 = raid6_get_fpu(); | |
278 | ||
279 | asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0])); | |
280 | @@ -111,13 +127,8 @@ | |
281 | ||
282 | static inline void raid6_after_sse(raid6_sse_save_t *s) | |
283 | { | |
284 | -#ifdef __x86_64__ | |
285 | - unsigned int *rsa = s->sarea; | |
286 | -#else | |
287 | - /* On i386 the save area may not be aligned */ | |
288 | - unsigned int *rsa = | |
289 | - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); | |
290 | -#endif | |
291 | + unsigned int *rsa = SAREA(s); | |
292 | + | |
293 | asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0])); | |
294 | asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4])); | |
295 | asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8])); | |
296 | @@ -132,13 +143,8 @@ | |
297 | ||
298 | static inline void raid6_before_sse2(raid6_sse_save_t *s) | |
299 | { | |
300 | -#ifdef __x86_64__ | |
301 | - unsigned int *rsa = &s->sarea; | |
302 | -#else | |
303 | - /* On i386 the save area may not be aligned */ | |
304 | - unsigned int *rsa = | |
305 | - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); | |
306 | -#endif | |
307 | + unsigned int *rsa = SAREA(s); | |
308 | + | |
309 | s->cr0 = raid6_get_fpu(); | |
310 | ||
311 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | |
312 | @@ -153,13 +159,8 @@ | |
313 | ||
314 | static inline void raid6_after_sse2(raid6_sse_save_t *s) | |
315 | { | |
316 | -#ifdef __x86_64__ | |
317 | - unsigned int *rsa = s->sarea; | |
318 | -#else | |
319 | - /* On i386 the save area may not be aligned */ | |
320 | - unsigned int *rsa = | |
321 | - (unsigned int *)((((unsigned long)&s->sarea)+15) & ~15); | |
322 | -#endif | |
323 | + unsigned int *rsa = SAREA(s); | |
324 | + | |
325 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | |
326 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | |
327 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | |
328 | @@ -174,9 +175,9 @@ | |
329 | ||
330 | #ifdef __x86_64__ | |
331 | ||
332 | -static inline raid6_before_sse16(raid6_sse16_save_t *s) | |
333 | +static inline void raid6_before_sse16(raid6_sse16_save_t *s) | |
334 | { | |
335 | - unsigned int *rsa = s->sarea; | |
336 | + unsigned int *rsa = SAREA(s); | |
337 | ||
338 | s->cr0 = raid6_get_fpu(); | |
339 | ||
340 | @@ -198,9 +199,9 @@ | |
341 | asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60])); | |
342 | } | |
343 | ||
344 | -static inline raid6_after_sse16(raid6_sse16_save_t *s) | |
345 | +static inline void raid6_after_sse16(raid6_sse16_save_t *s) | |
346 | { | |
347 | - unsigned int *rsa = s->sarea; | |
348 | + unsigned int *rsa = SAREA(s); | |
349 | ||
350 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | |
351 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | |
352 | =================================================================== | |
353 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6test/Makefile,v | |
354 | retrieving revision 1.2 | |
355 | diff -u -r1.2 Makefile | |
356 | --- linux-2.5/drivers/md/raid6test/Makefile 21 Jan 2004 03:11:33 -0000 1.2 | |
357 | +++ linux-2.5/drivers/md/raid6test/Makefile 21 Jan 2004 16:30:25 -0000 | |
358 | @@ -17,12 +17,10 @@ | |
359 | %.uc: ../%.uc | |
360 | cp -f $< $@ | |
361 | ||
362 | -%.pl: ../%.pl | |
363 | - cp -f $< $@ | |
364 | - | |
365 | all: raid6.o raid6test | |
366 | ||
367 | raid6.o: raid6int1.o raid6int2.o raid6int4.o raid6int8.o raid6int16.o \ | |
368 | + raid6int32.o \ | |
369 | raid6mmx.o raid6sse1.o raid6sse2.o \ | |
370 | raid6recov.o raid6algos.o \ | |
371 | raid6tables.o | |
372 | @@ -31,26 +29,29 @@ | |
373 | raid6test: raid6.o test.c | |
374 | $(CC) $(CFLAGS) -o raid6test $^ | |
375 | ||
376 | -raid6int1.c: raid6int.uc unroller.pl | |
377 | - $(PERL) ./unroller.pl 1 < raid6int.uc > $@ | |
378 | +raid6int1.c: raid6int.uc ../unroll.pl | |
379 | + $(PERL) ../unroll.pl 1 < raid6int.uc > $@ | |
380 | + | |
381 | +raid6int2.c: raid6int.uc ../unroll.pl | |
382 | + $(PERL) ../unroll.pl 2 < raid6int.uc > $@ | |
383 | ||
384 | -raid6int2.c: raid6int.uc unroller.pl | |
385 | - $(PERL) ./unroller.pl 2 < raid6int.uc > $@ | |
386 | +raid6int4.c: raid6int.uc ../unroll.pl | |
387 | + $(PERL) ../unroll.pl 4 < raid6int.uc > $@ | |
388 | ||
389 | -raid6int4.c: raid6int.uc unroller.pl | |
390 | - $(PERL) ./unroller.pl 4 < raid6int.uc > $@ | |
391 | +raid6int8.c: raid6int.uc ../unroll.pl | |
392 | + $(PERL) ../unroll.pl 8 < raid6int.uc > $@ | |
393 | ||
394 | -raid6int8.c: raid6int.uc unroller.pl | |
395 | - $(PERL) ./unroller.pl 8 < raid6int.uc > $@ | |
396 | +raid6int16.c: raid6int.uc ../unroll.pl | |
397 | + $(PERL) ../unroll.pl 16 < raid6int.uc > $@ | |
398 | ||
399 | -raid6int16.c: raid6int.uc unroller.pl | |
400 | - $(PERL) ./unroller.pl 16 < raid6int.uc > $@ | |
401 | +raid6int32.c: raid6int.uc ../unroll.pl | |
402 | + $(PERL) ../unroll.pl 32 < raid6int.uc > $@ | |
403 | ||
404 | raid6tables.c: mktables | |
405 | ./mktables > raid6tables.c | |
406 | ||
407 | clean: | |
408 | - rm -f *.o mktables mktables.c raid6int.uc raid6*.c raid6test | |
409 | + rm -f *.o mktables mktables.c raid6int.uc raid6*.c raid6test unroll.pl | |
410 | ||
411 | spotless: clean | |
412 | rm -f *~ | |
413 | =================================================================== | |
414 | RCS file: /home/hpa/kernel/bkcvs/linux-2.5/drivers/md/raid6test/test.c,v | |
415 | retrieving revision 1.2 | |
416 | diff -u -r1.2 test.c | |
417 | --- linux-2.5/drivers/md/raid6test/test.c 21 Jan 2004 03:11:33 -0000 1.2 | |
418 | +++ linux-2.5/drivers/md/raid6test/test.c 21 Jan 2004 16:30:42 -0000 | |
419 | @@ -73,14 +73,19 @@ | |
420 | erra = memcmp(data[i], recovi, PAGE_SIZE); | |
421 | errb = memcmp(data[j], recovj, PAGE_SIZE); | |
422 | ||
423 | - printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | |
424 | - raid6_call.name, | |
425 | - i, (i==NDISKS-2)?'P':'D', | |
426 | - j, (j==NDISKS-1)?'Q':(j==NDISKS-2)?'P':'D', | |
427 | - (!erra && !errb) ? "OK" : | |
428 | - !erra ? "ERRB" : | |
429 | - !errb ? "ERRA" : | |
430 | - "ERRAB"); | |
431 | + if ( i < NDISKS-2 && j == NDISKS-1 ) { | |
432 | + /* We don't implement the DQ failure scenario, since it's | |
433 | + equivalent to a RAID-5 failure (XOR, then recompute Q) */ | |
434 | + } else { | |
435 | + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", | |
436 | + raid6_call.name, | |
437 | + i, (i==NDISKS-2)?'P':'D', | |
438 | + j, (j==NDISKS-1)?'Q':(j==NDISKS-2)?'P':'D', | |
439 | + (!erra && !errb) ? "OK" : | |
440 | + !erra ? "ERRB" : | |
441 | + !errb ? "ERRA" : | |
442 | + "ERRAB"); | |
443 | + } | |
444 | ||
445 | dataptrs[i] = data[i]; | |
446 | dataptrs[j] = data[j]; |