2 This is a rehash of a patch from Andreas Gruenbacher <agruen@suse.de>
3 to add qsort as a kernel library function. Right now the only user of
4 this is XFS others users are expected.
6 The stack utilization is fairly modest and the function is not
9 I stupidly took the gnu-formatted function and ran Lindent over it
10 which made a bit of a mess (I manually fixed most of this and am
11 guilty of editing the diff...)
17 include/linux/kernel.h | 2
20 lib/qsort.c | 239 +++++++++++++++++++++++++++++++++++++++++++++++++
21 4 files changed, 245 insertions(+)
25 diff -Nru a/include/linux/kernel.h b/include/linux/kernel.h
26 --- a/include/linux/kernel.h Sun May 9 20:27:15 2004
27 +++ b/include/linux/kernel.h Sun May 9 20:27:15 2004
29 __attribute__ ((format (scanf,2,3)));
30 extern int vsscanf(const char *, const char *, va_list);
32 +extern void qsort(void *, size_t, size_t, int (*)(const void *,const void *));
34 extern int get_option(char **str, int *pint);
35 extern char *get_options(const char *str, int nints, int *ints);
36 extern unsigned long long memparse(char *ptr, char **retptr);
37 diff -Nru a/lib/Kconfig b/lib/Kconfig
38 --- a/lib/Kconfig Sun May 9 20:27:15 2004
39 +++ b/lib/Kconfig Sun May 9 20:27:15 2004
41 require M here. See Castagnoli93.
42 Module will be libcrc32c.
48 # compression support is select'ed if needed
50 diff -Nru a/lib/Makefile b/lib/Makefile
51 --- a/lib/Makefile Sun May 9 20:27:15 2004
52 +++ b/lib/Makefile Sun May 9 20:27:15 2004
55 obj-$(CONFIG_CRC32) += crc32.o
56 obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
57 +obj-$(CONFIG_QSORT) += qsort.o
59 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
60 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
61 diff -Nru a/lib/qsort.c b/lib/qsort.c
62 --- /dev/null Wed Dec 31 16:00:00 1969
63 +++ b/lib/qsort.c Sun May 9 20:27:15 2004
66 + * qsort implementation for the Linux kernel.
68 + * Original implementation taken form glibc and credited to Douglas
69 + * C. Schmidt (schmidt@ics.uci.edu).
71 + * This source code is licensed under the GNU General Public License,
72 + * Version 2. See the file COPYING for more details.
76 + * If you consider tuning this algorithm, you should consult first:
77 + * Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
78 + * Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.
81 +# include <linux/module.h>
82 +# include <linux/slab.h>
83 +# include <linux/string.h>
85 +MODULE_LICENSE("GPL");
87 +/* Byte-wise swap two items of size SIZE. */
88 +#define SWAP(a, b, size) \
90 + size_t __size = (size); \
91 + char *__a = (a), *__b = (b); \
93 + char __tmp = *__a; \
96 + } while (--__size > 0); \
99 +/* Discontinue quicksort algorithm when partition gets below this
100 + size. This particular magic number was chosen to work best on a
102 +#define MAX_THRESH 4
104 +/* Stack node declarations used to store unfulfilled partition
111 +/* The next 5 #defines implement a very fast in-line stack
112 + * abstraction. The stack needs log (total_elements) entries (we
113 + * could even subtract log(MAX_THRESH)). Since total_elements has
114 + * type size_t, we get as upper bound for log (total_elements): bits
115 + * per byte (CHAR_BIT) * sizeof(size_t). */
118 +#define STACK_SIZE (CHAR_BIT * sizeof(size_t))
119 +#define PUSH(low, high) ((top->lo = (low)), (top->hi = (high)), ++top)
120 +#define POP(low, high) (--top, (low = top->lo), (high = top->hi))
121 +#define STACK_NOT_EMPTY (stack < top)
123 +/* Order size using quicksort. This implementation incorporates four
124 + optimizations discussed in Sedgewick:
126 + 1. Non-recursive, using an explicit stack of pointer that store the
127 + next array partition to sort. To save time, this maximum amount
128 + of space required to store an array of SIZE_MAX is allocated on
129 + the stack. Assuming a 32-bit (64 bit) integer for size_t, this
130 + needs only 32 * sizeof(stack_node) == 256 bytes (for 64 bit:
131 + 1024 bytes). Pretty cheap, actually.
133 + 2. Chose the pivot element using a median-of-three decision tree.
134 + This reduces the probability of selecting a bad pivot value and
135 + eliminates certain extraneous comparisons.
137 + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
138 + insertion sort to order the MAX_THRESH items within each
139 + partition. This is a big win, since insertion sort is faster
140 + for small, mostly sorted array segments.
142 + 4. The larger of the two sub-partitions is always pushed onto the
143 + stack first, with the algorithm then concentrating on the
144 + smaller partition. This *guarantees* no more than log
145 + (total_elems) stack size is needed (actually O(1) in this case)!
149 +qsort(void *const pbase, size_t total_elems, size_t size,
150 + int (*cmp)(const void*, const void*))
152 + char *base_ptr = (char *)pbase;
154 + const size_t max_thresh = MAX_THRESH * size;
156 + /* Avoid lossage with unsigned arithmetic below. */
157 + if (total_elems == 0) {
161 + if (total_elems > MAX_THRESH) {
162 + char *lo = base_ptr;
163 + char *hi = &lo[size * (total_elems - 1)];
164 + stack_node stack[STACK_SIZE];
165 + stack_node *top = stack + 1;
167 + while (STACK_NOT_EMPTY) {
171 + /* Select median value from among LO, MID, and
172 + HI. Rearrange LO and HI so the three values
173 + are sorted. This lowers the probability of
174 + picking a pathological pivot value and
175 + skips a comparison for both the LEFT_PTR
176 + and RIGHT_PTR in the while loops. */
178 + char *mid = lo + size * ((hi - lo) / size >> 1);
180 + if ((*cmp)((void*)mid, (void*)lo) < 0)
181 + SWAP(mid, lo, size);
182 + if ((*cmp)((void*)hi, (void*)mid) < 0)
183 + SWAP(mid, hi, size);
186 + if ((*cmp)((void*)mid, (void*)lo) < 0)
187 + SWAP(mid, lo, size);
190 + left_ptr = lo + size;
191 + right_ptr = hi - size;
193 + /* Here's the famous ``collapse the walls''
194 + section of quicksort. Gotta like those
195 + tight inner loops! They are the main
196 + reason that this algorithm runs much faster
199 + while ((*cmp)((void*)left_ptr, (void*)mid) < 0)
202 + while ((*cmp)((void*)mid, (void*)right_ptr) < 0)
205 + if (left_ptr < right_ptr) {
206 + SWAP(left_ptr, right_ptr, size);
207 + if (mid == left_ptr)
209 + else if (mid == right_ptr)
213 + } else if (left_ptr == right_ptr) {
219 + while (left_ptr <= right_ptr);
221 + /* Set up pointers for next iteration. First
222 + determine whether left and right partitions
223 + are below the threshold size. If so,
224 + ignore one or both. Otherwise, push the
225 + larger partition's bounds on the stack and
226 + continue sorting the smaller one. */
228 + if ((size_t) (right_ptr - lo) <= max_thresh) {
229 + if ((size_t) (hi - left_ptr) <= max_thresh)
230 + /* Ignore both small partitions. */
233 + /* Ignore small left partition. */
235 + } else if ((size_t) (hi - left_ptr) <= max_thresh)
236 + /* Ignore small right partition. */
238 + else if ((right_ptr - lo) > (hi - left_ptr)) {
239 + /* Push larger left partition indices. */
240 + PUSH(lo, right_ptr);
243 + /* Push larger right partition indices. */
244 + PUSH(left_ptr, hi);
250 + /* Once the BASE_PTR array is partially sorted by quicksort
251 + the rest is completely sorted using insertion sort, since
252 + this is efficient for partitions below MAX_THRESH
253 + size. BASE_PTR points to the beginning of the array to
254 + sort, and END_PTR points at the very last element in the
255 + array (*not* one beyond it!). */
258 + char *end_ptr = &base_ptr[size * (total_elems - 1)];
259 + char *tmp_ptr = base_ptr;
260 + char *thresh = min(end_ptr, base_ptr + max_thresh);
263 + /* Find smallest element in first threshold and place
264 + it at the array's beginning. This is the smallest
265 + array element, and the operation speeds up
266 + insertion sort's inner loop. */
268 + for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
269 + if ((*cmp)((void*)run_ptr, (void*)tmp_ptr) < 0)
272 + if (tmp_ptr != base_ptr)
273 + SWAP(tmp_ptr, base_ptr, size);
275 + /* Insertion sort, running from left-hand-side up to
276 + * right-hand-side. */
278 + run_ptr = base_ptr + size;
279 + while ((run_ptr += size) <= end_ptr) {
280 + tmp_ptr = run_ptr - size;
281 + while ((*cmp)((void*)run_ptr, (void*)tmp_ptr) < 0)
285 + if (tmp_ptr != run_ptr) {
288 + trav = run_ptr + size;
289 + while (--trav >= run_ptr) {
293 + for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
302 +EXPORT_SYMBOL_GPL(qsort);