]>
Commit | Line | Data |
---|---|---|
cb791f4e | 1 | |
2 | This is a rehash of a patch from Andreas Gruenbacher <agruen@suse.de> | |
3 | to add qsort as a kernel library function. Right now the only user of | |
4 | this is XFS others users are expected. | |
5 | ||
6 | The stack utilization is fairly modest and the function is not | |
7 | recursive. | |
8 | ||
9 | I stupidly took the gnu-formatted function and ran Lindent over it | |
10 | which made a bit of a mess (I manually fixed most of this and am | |
11 | guilty of editing the diff...) | |
12 | ||
13 | ||
14 | --cw | |
15 | ||
16 | ||
17 | include/linux/kernel.h | 2 | |
18 | lib/Kconfig | 3 | |
19 | lib/Makefile | 1 | |
20 | lib/qsort.c | 239 +++++++++++++++++++++++++++++++++++++++++++++++++ | |
21 | 4 files changed, 245 insertions(+) | |
22 | ||
23 | ||
24 | ||
25 | diff -Nru a/include/linux/kernel.h b/include/linux/kernel.h | |
26 | --- a/include/linux/kernel.h Sun May 9 20:27:15 2004 | |
27 | +++ b/include/linux/kernel.h Sun May 9 20:27:15 2004 | |
28 | @@ -80,6 +80,8 @@ | |
29 | __attribute__ ((format (scanf,2,3))); | |
30 | extern int vsscanf(const char *, const char *, va_list); | |
31 | ||
32 | +extern void qsort(void *, size_t, size_t, int (*)(const void *,const void *)); | |
33 | + | |
34 | extern int get_option(char **str, int *pint); | |
35 | extern char *get_options(const char *str, int nints, int *ints); | |
36 | extern unsigned long long memparse(char *ptr, char **retptr); | |
37 | diff -Nru a/lib/Kconfig b/lib/Kconfig | |
38 | --- a/lib/Kconfig Sun May 9 20:27:15 2004 | |
39 | +++ b/lib/Kconfig Sun May 9 20:27:15 2004 | |
40 | @@ -21,6 +21,9 @@ | |
41 | require M here. See Castagnoli93. | |
42 | Module will be libcrc32c. | |
43 | ||
44 | +config QSORT | |
45 | + bool "Quick Sort" | |
46 | + | |
47 | # | |
48 | # compression support is select'ed if needed | |
49 | # | |
50 | diff -Nru a/lib/Makefile b/lib/Makefile | |
51 | --- a/lib/Makefile Sun May 9 20:27:15 2004 | |
52 | +++ b/lib/Makefile Sun May 9 20:27:15 2004 | |
53 | @@ -20,6 +20,7 @@ | |
54 | ||
55 | obj-$(CONFIG_CRC32) += crc32.o | |
56 | obj-$(CONFIG_LIBCRC32C) += libcrc32c.o | |
57 | +obj-$(CONFIG_QSORT) += qsort.o | |
58 | ||
59 | obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ | |
60 | obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ | |
61 | diff -Nru a/lib/qsort.c b/lib/qsort.c | |
62 | --- /dev/null Wed Dec 31 16:00:00 1969 | |
63 | +++ b/lib/qsort.c Sun May 9 20:27:15 2004 | |
64 | @@ -0,0 +1,238 @@ | |
65 | +/* | |
66 | + * qsort implementation for the Linux kernel. | |
67 | + * | |
68 | + * Original implementation taken form glibc and credited to Douglas | |
69 | + * C. Schmidt (schmidt@ics.uci.edu). | |
70 | + * | |
71 | + * This source code is licensed under the GNU General Public License, | |
72 | + * Version 2. See the file COPYING for more details. | |
73 | + */ | |
74 | + | |
75 | +/* | |
76 | + * If you consider tuning this algorithm, you should consult first: | |
77 | + * Engineering a sort function; Jon Bentley and M. Douglas McIlroy; | |
78 | + * Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. | |
79 | + */ | |
80 | + | |
81 | +# include <linux/module.h> | |
82 | +# include <linux/slab.h> | |
83 | +# include <linux/string.h> | |
84 | + | |
85 | +MODULE_LICENSE("GPL"); | |
86 | + | |
87 | +/* Byte-wise swap two items of size SIZE. */ | |
88 | +#define SWAP(a, b, size) \ | |
89 | + do { \ | |
90 | + size_t __size = (size); \ | |
91 | + char *__a = (a), *__b = (b); \ | |
92 | + do { \ | |
93 | + char __tmp = *__a; \ | |
94 | + *__a++ = *__b; \ | |
95 | + *__b++ = __tmp; \ | |
96 | + } while (--__size > 0); \ | |
97 | + } while (0) | |
98 | + | |
99 | +/* Discontinue quicksort algorithm when partition gets below this | |
100 | + size. This particular magic number was chosen to work best on a | |
101 | + Sun 4/260. */ | |
102 | +#define MAX_THRESH 4 | |
103 | + | |
104 | +/* Stack node declarations used to store unfulfilled partition | |
105 | + * obligations. */ | |
106 | +typedef struct { | |
107 | + char *lo; | |
108 | + char *hi; | |
109 | +} stack_node; | |
110 | + | |
111 | +/* The next 5 #defines implement a very fast in-line stack | |
112 | + * abstraction. The stack needs log (total_elements) entries (we | |
113 | + * could even subtract log(MAX_THRESH)). Since total_elements has | |
114 | + * type size_t, we get as upper bound for log (total_elements): bits | |
115 | + * per byte (CHAR_BIT) * sizeof(size_t). */ | |
116 | + | |
117 | +#define CHAR_BIT 8 | |
118 | +#define STACK_SIZE (CHAR_BIT * sizeof(size_t)) | |
119 | +#define PUSH(low, high) ((top->lo = (low)), (top->hi = (high)), ++top) | |
120 | +#define POP(low, high) (--top, (low = top->lo), (high = top->hi)) | |
121 | +#define STACK_NOT_EMPTY (stack < top) | |
122 | + | |
123 | +/* Order size using quicksort. This implementation incorporates four | |
124 | + optimizations discussed in Sedgewick: | |
125 | + | |
126 | + 1. Non-recursive, using an explicit stack of pointer that store the | |
127 | + next array partition to sort. To save time, this maximum amount | |
128 | + of space required to store an array of SIZE_MAX is allocated on | |
129 | + the stack. Assuming a 32-bit (64 bit) integer for size_t, this | |
130 | + needs only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: | |
131 | + 1024 bytes). Pretty cheap, actually. | |
132 | + | |
133 | + 2. Chose the pivot element using a median-of-three decision tree. | |
134 | + This reduces the probability of selecting a bad pivot value and | |
135 | + eliminates certain extraneous comparisons. | |
136 | + | |
137 | + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving | |
138 | + insertion sort to order the MAX_THRESH items within each | |
139 | + partition. This is a big win, since insertion sort is faster | |
140 | + for small, mostly sorted array segments. | |
141 | + | |
142 | + 4. The larger of the two sub-partitions is always pushed onto the | |
143 | + stack first, with the algorithm then concentrating on the | |
144 | + smaller partition. This *guarantees* no more than log | |
145 | + (total_elems) stack size is needed (actually O(1) in this case)! | |
146 | +*/ | |
147 | + | |
148 | +void | |
149 | +qsort(void *const pbase, size_t total_elems, size_t size, | |
150 | + int (*cmp)(const void*, const void*)) | |
151 | +{ | |
152 | + char *base_ptr = (char *)pbase; | |
153 | + | |
154 | + const size_t max_thresh = MAX_THRESH * size; | |
155 | + | |
156 | + /* Avoid lossage with unsigned arithmetic below. */ | |
157 | + if (total_elems == 0) { | |
158 | + return; | |
159 | + } | |
160 | + | |
161 | + if (total_elems > MAX_THRESH) { | |
162 | + char *lo = base_ptr; | |
163 | + char *hi = &lo[size * (total_elems - 1)]; | |
164 | + stack_node stack[STACK_SIZE]; | |
165 | + stack_node *top = stack + 1; | |
166 | + | |
167 | + while (STACK_NOT_EMPTY) { | |
168 | + char *left_ptr; | |
169 | + char *right_ptr; | |
170 | + | |
171 | + /* Select median value from among LO, MID, and | |
172 | + HI. Rearrange LO and HI so the three values | |
173 | + are sorted. This lowers the probability of | |
174 | + picking a pathological pivot value and | |
175 | + skips a comparison for both the LEFT_PTR | |
176 | + and RIGHT_PTR in the while loops. */ | |
177 | + | |
178 | + char *mid = lo + size * ((hi - lo) / size >> 1); | |
179 | + | |
180 | + if ((*cmp)((void*)mid, (void*)lo) < 0) | |
181 | + SWAP(mid, lo, size); | |
182 | + if ((*cmp)((void*)hi, (void*)mid) < 0) | |
183 | + SWAP(mid, hi, size); | |
184 | + else | |
185 | + goto jump_over; | |
186 | + if ((*cmp)((void*)mid, (void*)lo) < 0) | |
187 | + SWAP(mid, lo, size); | |
188 | + jump_over: | |
189 | + | |
190 | + left_ptr = lo + size; | |
191 | + right_ptr = hi - size; | |
192 | + | |
193 | + /* Here's the famous ``collapse the walls'' | |
194 | + section of quicksort. Gotta like those | |
195 | + tight inner loops! They are the main | |
196 | + reason that this algorithm runs much faster | |
197 | + than others. */ | |
198 | + do { | |
199 | + while ((*cmp)((void*)left_ptr, (void*)mid) < 0) | |
200 | + left_ptr += size; | |
201 | + | |
202 | + while ((*cmp)((void*)mid, (void*)right_ptr) < 0) | |
203 | + right_ptr -= size; | |
204 | + | |
205 | + if (left_ptr < right_ptr) { | |
206 | + SWAP(left_ptr, right_ptr, size); | |
207 | + if (mid == left_ptr) | |
208 | + mid = right_ptr; | |
209 | + else if (mid == right_ptr) | |
210 | + mid = left_ptr; | |
211 | + left_ptr += size; | |
212 | + right_ptr -= size; | |
213 | + } else if (left_ptr == right_ptr) { | |
214 | + left_ptr += size; | |
215 | + right_ptr -= size; | |
216 | + break; | |
217 | + } | |
218 | + } | |
219 | + while (left_ptr <= right_ptr); | |
220 | + | |
221 | + /* Set up pointers for next iteration. First | |
222 | + determine whether left and right partitions | |
223 | + are below the threshold size. If so, | |
224 | + ignore one or both. Otherwise, push the | |
225 | + larger partition's bounds on the stack and | |
226 | + continue sorting the smaller one. */ | |
227 | + | |
228 | + if ((size_t) (right_ptr - lo) <= max_thresh) { | |
229 | + if ((size_t) (hi - left_ptr) <= max_thresh) | |
230 | + /* Ignore both small partitions. */ | |
231 | + POP(lo, hi); | |
232 | + else | |
233 | + /* Ignore small left partition. */ | |
234 | + lo = left_ptr; | |
235 | + } else if ((size_t) (hi - left_ptr) <= max_thresh) | |
236 | + /* Ignore small right partition. */ | |
237 | + hi = right_ptr; | |
238 | + else if ((right_ptr - lo) > (hi - left_ptr)) { | |
239 | + /* Push larger left partition indices. */ | |
240 | + PUSH(lo, right_ptr); | |
241 | + lo = left_ptr; | |
242 | + } else { | |
243 | + /* Push larger right partition indices. */ | |
244 | + PUSH(left_ptr, hi); | |
245 | + hi = right_ptr; | |
246 | + } | |
247 | + } | |
248 | + } | |
249 | + | |
250 | + /* Once the BASE_PTR array is partially sorted by quicksort | |
251 | + the rest is completely sorted using insertion sort, since | |
252 | + this is efficient for partitions below MAX_THRESH | |
253 | + size. BASE_PTR points to the beginning of the array to | |
254 | + sort, and END_PTR points at the very last element in the | |
255 | + array (*not* one beyond it!). */ | |
256 | + | |
257 | + { | |
258 | + char *end_ptr = &base_ptr[size * (total_elems - 1)]; | |
259 | + char *tmp_ptr = base_ptr; | |
260 | + char *thresh = min(end_ptr, base_ptr + max_thresh); | |
261 | + char *run_ptr; | |
262 | + | |
263 | + /* Find smallest element in first threshold and place | |
264 | + it at the array's beginning. This is the smallest | |
265 | + array element, and the operation speeds up | |
266 | + insertion sort's inner loop. */ | |
267 | + | |
268 | + for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) | |
269 | + if ((*cmp)((void*)run_ptr, (void*)tmp_ptr) < 0) | |
270 | + tmp_ptr = run_ptr; | |
271 | + | |
272 | + if (tmp_ptr != base_ptr) | |
273 | + SWAP(tmp_ptr, base_ptr, size); | |
274 | + | |
275 | + /* Insertion sort, running from left-hand-side up to | |
276 | + * right-hand-side. */ | |
277 | + | |
278 | + run_ptr = base_ptr + size; | |
279 | + while ((run_ptr += size) <= end_ptr) { | |
280 | + tmp_ptr = run_ptr - size; | |
281 | + while ((*cmp)((void*)run_ptr, (void*)tmp_ptr) < 0) | |
282 | + tmp_ptr -= size; | |
283 | + | |
284 | + tmp_ptr += size; | |
285 | + if (tmp_ptr != run_ptr) { | |
286 | + char *trav; | |
287 | + | |
288 | + trav = run_ptr + size; | |
289 | + while (--trav >= run_ptr) { | |
290 | + char c = *trav; | |
291 | + char *hi, *lo; | |
292 | + | |
293 | + for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) | |
294 | + *hi = *lo; | |
295 | + *hi = c; | |
296 | + } | |
297 | + } | |
298 | + } | |
299 | + } | |
300 | +} | |
301 | + | |
302 | +EXPORT_SYMBOL_GPL(qsort); |