]>
Commit | Line | Data |
---|---|---|
537831f9 AM |
1 | diff -Naur linux-2.6.24.orig/include/uapi/linux/pkt_sched.h linux-2.6.24/include/uapi/linux/pkt_sched.h |
2 | --- linux-2.6.24.orig/include/uapi/linux/pkt_sched.h 2008-01-24 14:58:37.000000000 -0800 | |
3 | +++ linux-2.6.24/include/uapi/linux/pkt_sched.h 2008-01-28 00:27:12.000000000 -0800 | |
2380c486 JR |
4 | @@ -157,6 +157,33 @@ |
5 | * to change these parameters in compile time. | |
6 | */ | |
7 | ||
8 | +/* ESFQ section */ | |
9 | + | |
10 | +enum | |
11 | +{ | |
12 | + /* traditional */ | |
13 | + TCA_ESFQ_HASH_CLASSIC, | |
14 | + TCA_ESFQ_HASH_DST, | |
15 | + TCA_ESFQ_HASH_SRC, | |
16 | + TCA_ESFQ_HASH_FWMARK, | |
17 | + /* conntrack */ | |
18 | + TCA_ESFQ_HASH_CTORIGDST, | |
19 | + TCA_ESFQ_HASH_CTORIGSRC, | |
20 | + TCA_ESFQ_HASH_CTREPLDST, | |
21 | + TCA_ESFQ_HASH_CTREPLSRC, | |
22 | + TCA_ESFQ_HASH_CTNATCHG, | |
23 | +}; | |
24 | + | |
25 | +struct tc_esfq_qopt | |
26 | +{ | |
27 | + unsigned quantum; /* Bytes per round allocated to flow */ | |
28 | + int perturb_period; /* Period of hash perturbation */ | |
29 | + __u32 limit; /* Maximal packets in queue */ | |
30 | + unsigned divisor; /* Hash divisor */ | |
31 | + unsigned flows; /* Maximal number of flows */ | |
32 | + unsigned hash_kind; /* Hash function to use for flow identification */ | |
33 | +}; | |
34 | + | |
35 | /* RED section */ | |
36 | ||
37 | enum | |
38 | diff -Naur linux-2.6.24.orig/net/sched/Kconfig linux-2.6.24/net/sched/Kconfig | |
39 | --- linux-2.6.24.orig/net/sched/Kconfig 2008-01-24 14:58:37.000000000 -0800 | |
40 | +++ linux-2.6.24/net/sched/Kconfig 2008-01-28 00:27:12.000000000 -0800 | |
41 | @@ -139,6 +139,37 @@ | |
42 | To compile this code as a module, choose M here: the | |
43 | module will be called sch_sfq. | |
44 | ||
45 | +config NET_SCH_ESFQ | |
46 | + tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" | |
47 | + ---help--- | |
48 | + Say Y here if you want to use the Enhanced Stochastic Fairness | |
49 | + Queueing (ESFQ) packet scheduling algorithm for some of your network | |
50 | + devices or as a leaf discipline for a classful qdisc such as HTB or | |
51 | + CBQ (see the top of <file:net/sched/sch_esfq.c> for details and | |
52 | + references to the SFQ algorithm). | |
53 | + | |
54 | + This is an enchanced SFQ version which allows you to control some | |
55 | + hardcoded values in the SFQ scheduler. | |
56 | + | |
57 | + ESFQ also adds control of the hash function used to identify packet | |
58 | + flows. The original SFQ discipline hashes by connection; ESFQ add | |
59 | + several other hashing methods, such as by src IP or by dst IP, which | |
60 | + can be more fair to users in some networking situations. | |
61 | + | |
62 | + To compile this code as a module, choose M here: the | |
63 | + module will be called sch_esfq. | |
64 | + | |
65 | +config NET_SCH_ESFQ_NFCT | |
66 | + bool "Connection Tracking Hash Types" | |
67 | + depends on NET_SCH_ESFQ && NF_CONNTRACK | |
68 | + ---help--- | |
69 | + Say Y here to enable support for hashing based on netfilter connection | |
70 | + tracking information. This is useful for a router that is also using | |
71 | + NAT to connect privately-addressed hosts to the Internet. If you want | |
72 | + to provide fair distribution of upstream bandwidth, ESFQ must use | |
73 | + connection tracking information, since all outgoing packets will share | |
74 | + the same source address. | |
75 | + | |
76 | config NET_SCH_TEQL | |
77 | tristate "True Link Equalizer (TEQL)" | |
78 | ---help--- | |
79 | diff -Naur linux-2.6.24.orig/net/sched/Makefile linux-2.6.24/net/sched/Makefile | |
80 | --- linux-2.6.24.orig/net/sched/Makefile 2008-01-24 14:58:37.000000000 -0800 | |
81 | +++ linux-2.6.24/net/sched/Makefile 2008-01-28 00:27:12.000000000 -0800 | |
82 | @@ -23,6 +23,7 @@ | |
83 | obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o | |
84 | obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o | |
85 | obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o | |
86 | +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o | |
87 | obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o | |
88 | obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o | |
89 | obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o | |
90 | diff -Naur linux-2.6.24.orig/net/sched/sch_esfq.c linux-2.6.24/net/sched/sch_esfq.c | |
91 | --- linux-2.6.24.orig/net/sched/sch_esfq.c 1969-12-31 16:00:00.000000000 -0800 | |
92 | +++ linux-2.6.24/net/sched/sch_esfq.c 2008-01-28 00:27:22.000000000 -0800 | |
fd53d4a1 | 93 | @@ -0,0 +1,700 @@ |
2380c486 JR |
94 | +/* |
95 | + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. | |
96 | + * | |
97 | + * This program is free software; you can redistribute it and/or | |
98 | + * modify it under the terms of the GNU General Public License | |
99 | + * as published by the Free Software Foundation; either version | |
100 | + * 2 of the License, or (at your option) any later version. | |
101 | + * | |
102 | + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | |
103 | + * | |
104 | + * Changes: Alexander Atanasov, <alex@ssi.bg> | |
105 | + * Added dynamic depth,limit,divisor,hash_kind options. | |
106 | + * Added dst and src hashes. | |
107 | + * | |
108 | + * Alexander Clouter, <alex@digriz.org.uk> | |
109 | + * Ported ESFQ to Linux 2.6. | |
110 | + * | |
111 | + * Corey Hickey, <bugfood-c@fatooh.org> | |
112 | + * Maintenance of the Linux 2.6 port. | |
113 | + * Added fwmark hash (thanks to Robert Kurjata). | |
114 | + * Added usage of jhash. | |
115 | + * Added conntrack support. | |
116 | + * Added ctnatchg hash (thanks to Ben Pfountz). | |
117 | + */ | |
118 | + | |
119 | +#include <linux/module.h> | |
120 | +#include <asm/uaccess.h> | |
92d182d2 | 121 | + |
2380c486 JR |
122 | +#include <linux/bitops.h> |
123 | +#include <linux/types.h> | |
124 | +#include <linux/kernel.h> | |
125 | +#include <linux/jiffies.h> | |
126 | +#include <linux/string.h> | |
127 | +#include <linux/mm.h> | |
128 | +#include <linux/socket.h> | |
129 | +#include <linux/sockios.h> | |
130 | +#include <linux/in.h> | |
131 | +#include <linux/errno.h> | |
132 | +#include <linux/interrupt.h> | |
133 | +#include <linux/if_ether.h> | |
134 | +#include <linux/inet.h> | |
135 | +#include <linux/netdevice.h> | |
136 | +#include <linux/etherdevice.h> | |
137 | +#include <linux/notifier.h> | |
138 | +#include <linux/init.h> | |
139 | +#include <net/ip.h> | |
140 | +#include <net/netlink.h> | |
141 | +#include <linux/ipv6.h> | |
142 | +#include <net/route.h> | |
143 | +#include <linux/skbuff.h> | |
144 | +#include <net/sock.h> | |
145 | +#include <net/pkt_sched.h> | |
146 | +#include <linux/jhash.h> | |
147 | +#include <net/netfilter/nf_conntrack.h> | |
148 | + | |
149 | +/* Stochastic Fairness Queuing algorithm. | |
150 | + For more comments look at sch_sfq.c. | |
151 | + The difference is that you can change limit, depth, | |
152 | + hash table size and choose alternate hash types. | |
153 | + | |
154 | + classic: same as in sch_sfq.c | |
155 | + dst: destination IP address | |
156 | + src: source IP address | |
157 | + fwmark: netfilter mark value | |
158 | + ctorigdst: original destination IP address | |
159 | + ctorigsrc: original source IP address | |
160 | + ctrepldst: reply destination IP address | |
161 | + ctreplsrc: reply source IP | |
162 | + | |
163 | +*/ | |
164 | + | |
165 | +#define ESFQ_HEAD 0 | |
166 | +#define ESFQ_TAIL 1 | |
167 | + | |
168 | +/* This type should contain at least SFQ_DEPTH*2 values */ | |
169 | +typedef unsigned int esfq_index; | |
170 | + | |
171 | +struct esfq_head | |
172 | +{ | |
173 | + esfq_index next; | |
174 | + esfq_index prev; | |
175 | +}; | |
176 | + | |
177 | +struct esfq_sched_data | |
178 | +{ | |
179 | +/* Parameters */ | |
180 | + int perturb_period; | |
181 | + unsigned quantum; /* Allotment per round: MUST BE >= MTU */ | |
182 | + int limit; | |
183 | + unsigned depth; | |
184 | + unsigned hash_divisor; | |
185 | + unsigned hash_kind; | |
186 | +/* Variables */ | |
187 | + struct timer_list perturb_timer; | |
188 | + int perturbation; | |
189 | + esfq_index tail; /* Index of current slot in round */ | |
190 | + esfq_index max_depth; /* Maximal depth */ | |
191 | + | |
192 | + esfq_index *ht; /* Hash table */ | |
193 | + esfq_index *next; /* Active slots link */ | |
194 | + short *allot; /* Current allotment per slot */ | |
195 | + unsigned short *hash; /* Hash value indexed by slots */ | |
196 | + struct sk_buff_head *qs; /* Slot queue */ | |
197 | + struct esfq_head *dep; /* Linked list of slots, indexed by depth */ | |
198 | +}; | |
199 | + | |
200 | +/* This contains the info we will hash. */ | |
201 | +struct esfq_packet_info | |
202 | +{ | |
203 | + u32 proto; /* protocol or port */ | |
204 | + u32 src; /* source from packet header */ | |
205 | + u32 dst; /* destination from packet header */ | |
206 | + u32 ctorigsrc; /* original source from conntrack */ | |
207 | + u32 ctorigdst; /* original destination from conntrack */ | |
208 | + u32 ctreplsrc; /* reply source from conntrack */ | |
209 | + u32 ctrepldst; /* reply destination from conntrack */ | |
210 | + u32 mark; /* netfilter mark (fwmark) */ | |
211 | +}; | |
212 | + | |
213 | +static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) | |
214 | +{ | |
215 | + return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); | |
216 | +} | |
217 | + | |
218 | +static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) | |
219 | +{ | |
220 | + return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); | |
221 | +} | |
222 | + | |
223 | +static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) | |
224 | +{ | |
225 | + return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); | |
226 | +} | |
227 | + | |
228 | +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) | |
229 | +{ | |
230 | + struct esfq_packet_info info; | |
231 | +#ifdef CONFIG_NET_SCH_ESFQ_NFCT | |
232 | + enum ip_conntrack_info ctinfo; | |
233 | + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | |
234 | +#endif | |
235 | + | |
236 | + switch (skb->protocol) { | |
237 | + case __constant_htons(ETH_P_IP): | |
238 | + { | |
239 | + struct iphdr *iph = ip_hdr(skb); | |
240 | + info.dst = iph->daddr; | |
241 | + info.src = iph->saddr; | |
242 | + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && | |
243 | + (iph->protocol == IPPROTO_TCP || | |
244 | + iph->protocol == IPPROTO_UDP || | |
245 | + iph->protocol == IPPROTO_SCTP || | |
246 | + iph->protocol == IPPROTO_DCCP || | |
247 | + iph->protocol == IPPROTO_ESP)) | |
248 | + info.proto = *(((u32*)iph) + iph->ihl); | |
249 | + else | |
250 | + info.proto = iph->protocol; | |
251 | + break; | |
252 | + } | |
253 | + case __constant_htons(ETH_P_IPV6): | |
254 | + { | |
255 | + struct ipv6hdr *iph = ipv6_hdr(skb); | |
256 | + /* Hash ipv6 addresses into a u32. This isn't ideal, | |
257 | + * but the code is simple. */ | |
258 | + info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); | |
259 | + info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); | |
260 | + if (iph->nexthdr == IPPROTO_TCP || | |
261 | + iph->nexthdr == IPPROTO_UDP || | |
262 | + iph->nexthdr == IPPROTO_SCTP || | |
263 | + iph->nexthdr == IPPROTO_DCCP || | |
264 | + iph->nexthdr == IPPROTO_ESP) | |
265 | + info.proto = *(u32*)&iph[1]; | |
266 | + else | |
267 | + info.proto = iph->nexthdr; | |
268 | + break; | |
269 | + } | |
270 | + default: | |
3b4370a0 | 271 | + info.dst = (u32)(unsigned long)skb_dst(skb); |
2380c486 JR |
272 | + info.src = (u32)(unsigned long)skb->sk; |
273 | + info.proto = skb->protocol; | |
274 | + } | |
275 | + | |
276 | + info.mark = skb->mark; | |
277 | + | |
278 | +#ifdef CONFIG_NET_SCH_ESFQ_NFCT | |
279 | + /* defaults if there is no conntrack info */ | |
280 | + info.ctorigsrc = info.src; | |
281 | + info.ctorigdst = info.dst; | |
282 | + info.ctreplsrc = info.dst; | |
283 | + info.ctrepldst = info.src; | |
284 | + /* collect conntrack info */ | |
285 | + if (ct && ct != &nf_conntrack_untracked) { | |
286 | + if (skb->protocol == __constant_htons(ETH_P_IP)) { | |
287 | + info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; | |
288 | + info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; | |
289 | + info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; | |
290 | + info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; | |
291 | + } | |
292 | + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { | |
293 | + /* Again, hash ipv6 addresses into a single u32. */ | |
294 | + info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); | |
295 | + info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); | |
296 | + info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); | |
297 | + info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); | |
298 | + } | |
299 | + | |
300 | + } | |
301 | +#endif | |
302 | + | |
303 | + switch(q->hash_kind) { | |
304 | + case TCA_ESFQ_HASH_CLASSIC: | |
305 | + return esfq_jhash_3words(q, info.dst, info.src, info.proto); | |
306 | + case TCA_ESFQ_HASH_DST: | |
307 | + return esfq_jhash_1word(q, info.dst); | |
308 | + case TCA_ESFQ_HASH_SRC: | |
309 | + return esfq_jhash_1word(q, info.src); | |
310 | + case TCA_ESFQ_HASH_FWMARK: | |
311 | + return esfq_jhash_1word(q, info.mark); | |
312 | +#ifdef CONFIG_NET_SCH_ESFQ_NFCT | |
313 | + case TCA_ESFQ_HASH_CTORIGDST: | |
314 | + return esfq_jhash_1word(q, info.ctorigdst); | |
315 | + case TCA_ESFQ_HASH_CTORIGSRC: | |
316 | + return esfq_jhash_1word(q, info.ctorigsrc); | |
317 | + case TCA_ESFQ_HASH_CTREPLDST: | |
318 | + return esfq_jhash_1word(q, info.ctrepldst); | |
319 | + case TCA_ESFQ_HASH_CTREPLSRC: | |
320 | + return esfq_jhash_1word(q, info.ctreplsrc); | |
321 | + case TCA_ESFQ_HASH_CTNATCHG: | |
322 | + { | |
323 | + if (info.ctorigdst == info.ctreplsrc) | |
324 | + return esfq_jhash_1word(q, info.ctorigsrc); | |
325 | + return esfq_jhash_1word(q, info.ctreplsrc); | |
326 | + } | |
327 | +#endif | |
328 | + default: | |
329 | + if (net_ratelimit()) | |
330 | + printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); | |
331 | + } | |
332 | + return esfq_jhash_3words(q, info.dst, info.src, info.proto); | |
333 | +} | |
334 | + | |
335 | +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) | |
336 | +{ | |
337 | + esfq_index p, n; | |
338 | + int d = q->qs[x].qlen + q->depth; | |
339 | + | |
340 | + p = d; | |
341 | + n = q->dep[d].next; | |
342 | + q->dep[x].next = n; | |
343 | + q->dep[x].prev = p; | |
344 | + q->dep[p].next = q->dep[n].prev = x; | |
345 | +} | |
346 | + | |
347 | +static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) | |
348 | +{ | |
349 | + esfq_index p, n; | |
350 | + | |
351 | + n = q->dep[x].next; | |
352 | + p = q->dep[x].prev; | |
353 | + q->dep[p].next = n; | |
354 | + q->dep[n].prev = p; | |
355 | + | |
356 | + if (n == p && q->max_depth == q->qs[x].qlen + 1) | |
357 | + q->max_depth--; | |
358 | + | |
359 | + esfq_link(q, x); | |
360 | +} | |
361 | + | |
362 | +static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) | |
363 | +{ | |
364 | + esfq_index p, n; | |
365 | + int d; | |
366 | + | |
367 | + n = q->dep[x].next; | |
368 | + p = q->dep[x].prev; | |
369 | + q->dep[p].next = n; | |
370 | + q->dep[n].prev = p; | |
371 | + d = q->qs[x].qlen; | |
372 | + if (q->max_depth < d) | |
373 | + q->max_depth = d; | |
374 | + | |
375 | + esfq_link(q, x); | |
376 | +} | |
377 | + | |
378 | +static unsigned int esfq_drop(struct Qdisc *sch) | |
379 | +{ | |
380 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
381 | + esfq_index d = q->max_depth; | |
382 | + struct sk_buff *skb; | |
383 | + unsigned int len; | |
384 | + | |
385 | + /* Queue is full! Find the longest slot and | |
386 | + drop a packet from it */ | |
387 | + | |
388 | + if (d > 1) { | |
389 | + esfq_index x = q->dep[d+q->depth].next; | |
390 | + skb = q->qs[x].prev; | |
391 | + len = skb->len; | |
392 | + __skb_unlink(skb, &q->qs[x]); | |
393 | + kfree_skb(skb); | |
394 | + esfq_dec(q, x); | |
395 | + sch->q.qlen--; | |
396 | + sch->qstats.drops++; | |
397 | + sch->qstats.backlog -= len; | |
398 | + return len; | |
399 | + } | |
400 | + | |
401 | + if (d == 1) { | |
402 | + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ | |
403 | + d = q->next[q->tail]; | |
404 | + q->next[q->tail] = q->next[d]; | |
405 | + q->allot[q->next[d]] += q->quantum; | |
406 | + skb = q->qs[d].prev; | |
407 | + len = skb->len; | |
408 | + __skb_unlink(skb, &q->qs[d]); | |
409 | + kfree_skb(skb); | |
410 | + esfq_dec(q, d); | |
411 | + sch->q.qlen--; | |
412 | + q->ht[q->hash[d]] = q->depth; | |
413 | + sch->qstats.drops++; | |
414 | + sch->qstats.backlog -= len; | |
415 | + return len; | |
416 | + } | |
417 | + | |
418 | + return 0; | |
419 | +} | |
420 | + | |
421 | +static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) | |
422 | +{ | |
423 | + unsigned hash = esfq_hash(q, skb); | |
424 | + unsigned depth = q->depth; | |
425 | + esfq_index x; | |
426 | + | |
427 | + x = q->ht[hash]; | |
428 | + if (x == depth) { | |
429 | + q->ht[hash] = x = q->dep[depth].next; | |
430 | + q->hash[x] = hash; | |
431 | + } | |
432 | + | |
433 | + if (end == ESFQ_TAIL) | |
434 | + __skb_queue_tail(&q->qs[x], skb); | |
435 | + else | |
436 | + __skb_queue_head(&q->qs[x], skb); | |
437 | + | |
438 | + esfq_inc(q, x); | |
439 | + if (q->qs[x].qlen == 1) { /* The flow is new */ | |
440 | + if (q->tail == depth) { /* It is the first flow */ | |
441 | + q->tail = x; | |
442 | + q->next[x] = x; | |
443 | + q->allot[x] = q->quantum; | |
444 | + } else { | |
445 | + q->next[x] = q->next[q->tail]; | |
446 | + q->next[q->tail] = x; | |
447 | + q->tail = x; | |
448 | + } | |
449 | + } | |
450 | +} | |
451 | + | |
452 | +static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |
453 | +{ | |
454 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
455 | + esfq_q_enqueue(skb, q, ESFQ_TAIL); | |
456 | + sch->qstats.backlog += skb->len; | |
457 | + if (++sch->q.qlen < q->limit-1) { | |
458 | + sch->bstats.bytes += skb->len; | |
459 | + sch->bstats.packets++; | |
460 | + return 0; | |
461 | + } | |
462 | + | |
463 | + sch->qstats.drops++; | |
464 | + esfq_drop(sch); | |
465 | + return NET_XMIT_CN; | |
466 | +} | |
467 | + | |
921c0c8a | 468 | +static struct sk_buff *esfq_peek(struct Qdisc* sch) |
2380c486 JR |
469 | +{ |
470 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
921c0c8a | 471 | + esfq_index a; |
2380c486 | 472 | + |
921c0c8a JR |
473 | + /* No active slots */ |
474 | + if (q->tail == q->depth) | |
475 | + return NULL; | |
476 | + | |
477 | + a = q->next[q->tail]; | |
478 | + return skb_peek(&q->qs[a]); | |
2380c486 JR |
479 | +} |
480 | + | |
481 | +static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) | |
482 | +{ | |
483 | + struct sk_buff *skb; | |
484 | + unsigned depth = q->depth; | |
485 | + esfq_index a, old_a; | |
486 | + | |
487 | + /* No active slots */ | |
488 | + if (q->tail == depth) | |
489 | + return NULL; | |
490 | + | |
491 | + a = old_a = q->next[q->tail]; | |
492 | + | |
493 | + /* Grab packet */ | |
494 | + skb = __skb_dequeue(&q->qs[a]); | |
495 | + esfq_dec(q, a); | |
496 | + | |
497 | + /* Is the slot empty? */ | |
498 | + if (q->qs[a].qlen == 0) { | |
499 | + q->ht[q->hash[a]] = depth; | |
500 | + a = q->next[a]; | |
501 | + if (a == old_a) { | |
502 | + q->tail = depth; | |
503 | + return skb; | |
504 | + } | |
505 | + q->next[q->tail] = a; | |
506 | + q->allot[a] += q->quantum; | |
507 | + } else if ((q->allot[a] -= skb->len) <= 0) { | |
508 | + q->tail = a; | |
509 | + a = q->next[a]; | |
510 | + q->allot[a] += q->quantum; | |
511 | + } | |
512 | + | |
513 | + return skb; | |
514 | +} | |
515 | + | |
516 | +static struct sk_buff *esfq_dequeue(struct Qdisc* sch) | |
517 | +{ | |
518 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
519 | + struct sk_buff *skb; | |
520 | + | |
521 | + skb = esfq_q_dequeue(q); | |
522 | + if (skb == NULL) | |
523 | + return NULL; | |
524 | + sch->q.qlen--; | |
525 | + sch->qstats.backlog -= skb->len; | |
526 | + return skb; | |
527 | +} | |
528 | + | |
529 | +static void esfq_q_destroy(struct esfq_sched_data *q) | |
530 | +{ | |
531 | + del_timer(&q->perturb_timer); | |
532 | + if(q->ht) | |
533 | + kfree(q->ht); | |
534 | + if(q->dep) | |
535 | + kfree(q->dep); | |
536 | + if(q->next) | |
537 | + kfree(q->next); | |
538 | + if(q->allot) | |
539 | + kfree(q->allot); | |
540 | + if(q->hash) | |
541 | + kfree(q->hash); | |
542 | + if(q->qs) | |
543 | + kfree(q->qs); | |
544 | +} | |
545 | + | |
546 | +static void esfq_destroy(struct Qdisc *sch) | |
547 | +{ | |
548 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
549 | + esfq_q_destroy(q); | |
550 | +} | |
551 | + | |
552 | + | |
553 | +static void esfq_reset(struct Qdisc* sch) | |
554 | +{ | |
555 | + struct sk_buff *skb; | |
556 | + | |
557 | + while ((skb = esfq_dequeue(sch)) != NULL) | |
558 | + kfree_skb(skb); | |
559 | +} | |
560 | + | |
561 | +static void esfq_perturbation(unsigned long arg) | |
562 | +{ | |
563 | + struct Qdisc *sch = (struct Qdisc*)arg; | |
564 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
565 | + | |
34e21f5f | 566 | + q->perturbation = prandom_u32() & 0x1F; |
2380c486 JR |
567 | + |
568 | + if (q->perturb_period) { | |
569 | + q->perturb_timer.expires = jiffies + q->perturb_period; | |
570 | + add_timer(&q->perturb_timer); | |
571 | + } | |
572 | +} | |
573 | + | |
574 | +static unsigned int esfq_check_hash(unsigned int kind) | |
575 | +{ | |
576 | + switch (kind) { | |
577 | + case TCA_ESFQ_HASH_CTORIGDST: | |
578 | + case TCA_ESFQ_HASH_CTORIGSRC: | |
579 | + case TCA_ESFQ_HASH_CTREPLDST: | |
580 | + case TCA_ESFQ_HASH_CTREPLSRC: | |
581 | + case TCA_ESFQ_HASH_CTNATCHG: | |
582 | +#ifndef CONFIG_NET_SCH_ESFQ_NFCT | |
583 | + { | |
584 | + if (net_ratelimit()) | |
585 | + printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); | |
586 | + return TCA_ESFQ_HASH_CLASSIC; | |
587 | + } | |
588 | +#endif | |
589 | + case TCA_ESFQ_HASH_CLASSIC: | |
590 | + case TCA_ESFQ_HASH_DST: | |
591 | + case TCA_ESFQ_HASH_SRC: | |
592 | + case TCA_ESFQ_HASH_FWMARK: | |
593 | + return kind; | |
594 | + default: | |
595 | + { | |
596 | + if (net_ratelimit()) | |
597 | + printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); | |
598 | + return TCA_ESFQ_HASH_CLASSIC; | |
599 | + } | |
600 | + } | |
601 | +} | |
602 | + | |
603 | +static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) | |
604 | +{ | |
605 | + struct tc_esfq_qopt *ctl = nla_data(opt); | |
606 | + esfq_index p = ~0U/2; | |
607 | + int i; | |
608 | + | |
609 | + if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) | |
610 | + return -EINVAL; | |
611 | + | |
612 | + q->perturbation = 0; | |
613 | + q->hash_kind = TCA_ESFQ_HASH_CLASSIC; | |
614 | + q->max_depth = 0; | |
615 | + if (opt == NULL) { | |
616 | + q->perturb_period = 0; | |
617 | + q->hash_divisor = 1024; | |
618 | + q->tail = q->limit = q->depth = 128; | |
619 | + | |
620 | + } else { | |
621 | + struct tc_esfq_qopt *ctl = nla_data(opt); | |
622 | + if (ctl->quantum) | |
623 | + q->quantum = ctl->quantum; | |
624 | + q->perturb_period = ctl->perturb_period*HZ; | |
625 | + q->hash_divisor = ctl->divisor ? : 1024; | |
626 | + q->tail = q->limit = q->depth = ctl->flows ? : 128; | |
627 | + | |
628 | + if ( q->depth > p - 1 ) | |
629 | + return -EINVAL; | |
630 | + | |
631 | + if (ctl->limit) | |
632 | + q->limit = min_t(u32, ctl->limit, q->depth); | |
633 | + | |
634 | + if (ctl->hash_kind) { | |
635 | + q->hash_kind = esfq_check_hash(ctl->hash_kind); | |
636 | + } | |
637 | + } | |
638 | + | |
639 | + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); | |
640 | + if (!q->ht) | |
641 | + goto err_case; | |
642 | + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); | |
643 | + if (!q->dep) | |
644 | + goto err_case; | |
645 | + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); | |
646 | + if (!q->next) | |
647 | + goto err_case; | |
648 | + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); | |
649 | + if (!q->allot) | |
650 | + goto err_case; | |
651 | + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); | |
652 | + if (!q->hash) | |
653 | + goto err_case; | |
654 | + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); | |
655 | + if (!q->qs) | |
656 | + goto err_case; | |
657 | + | |
658 | + for (i=0; i< q->hash_divisor; i++) | |
659 | + q->ht[i] = q->depth; | |
660 | + for (i=0; i<q->depth; i++) { | |
661 | + skb_queue_head_init(&q->qs[i]); | |
662 | + q->dep[i+q->depth].next = i+q->depth; | |
663 | + q->dep[i+q->depth].prev = i+q->depth; | |
664 | + } | |
665 | + | |
666 | + for (i=0; i<q->depth; i++) | |
667 | + esfq_link(q, i); | |
668 | + return 0; | |
669 | +err_case: | |
670 | + esfq_q_destroy(q); | |
671 | + return -ENOBUFS; | |
672 | +} | |
673 | + | |
674 | +static int esfq_init(struct Qdisc *sch, struct nlattr *opt) | |
675 | +{ | |
676 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
677 | + int err; | |
678 | + | |
679 | + q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ | |
680 | + if ((err = esfq_q_init(q, opt))) | |
681 | + return err; | |
682 | + | |
683 | + init_timer(&q->perturb_timer); | |
684 | + q->perturb_timer.data = (unsigned long)sch; | |
685 | + q->perturb_timer.function = esfq_perturbation; | |
686 | + if (q->perturb_period) { | |
687 | + q->perturb_timer.expires = jiffies + q->perturb_period; | |
688 | + add_timer(&q->perturb_timer); | |
689 | + } | |
690 | + | |
691 | + return 0; | |
692 | +} | |
693 | + | |
694 | +static int esfq_change(struct Qdisc *sch, struct nlattr *opt) | |
695 | +{ | |
696 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
697 | + struct esfq_sched_data new; | |
698 | + struct sk_buff *skb; | |
699 | + int err; | |
700 | + | |
701 | + /* set up new queue */ | |
702 | + memset(&new, 0, sizeof(struct esfq_sched_data)); | |
703 | + new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ | |
704 | + if ((err = esfq_q_init(&new, opt))) | |
705 | + return err; | |
706 | + | |
707 | + /* copy all packets from the old queue to the new queue */ | |
708 | + sch_tree_lock(sch); | |
709 | + while ((skb = esfq_q_dequeue(q)) != NULL) | |
710 | + esfq_q_enqueue(skb, &new, ESFQ_TAIL); | |
711 | + | |
712 | + /* clean up the old queue */ | |
713 | + esfq_q_destroy(q); | |
714 | + | |
715 | + /* copy elements of the new queue into the old queue */ | |
716 | + q->perturb_period = new.perturb_period; | |
717 | + q->quantum = new.quantum; | |
718 | + q->limit = new.limit; | |
719 | + q->depth = new.depth; | |
720 | + q->hash_divisor = new.hash_divisor; | |
721 | + q->hash_kind = new.hash_kind; | |
722 | + q->tail = new.tail; | |
723 | + q->max_depth = new.max_depth; | |
724 | + q->ht = new.ht; | |
725 | + q->dep = new.dep; | |
726 | + q->next = new.next; | |
727 | + q->allot = new.allot; | |
728 | + q->hash = new.hash; | |
729 | + q->qs = new.qs; | |
730 | + | |
731 | + /* finish up */ | |
732 | + if (q->perturb_period) { | |
733 | + q->perturb_timer.expires = jiffies + q->perturb_period; | |
734 | + add_timer(&q->perturb_timer); | |
735 | + } else { | |
736 | + q->perturbation = 0; | |
737 | + } | |
738 | + sch_tree_unlock(sch); | |
739 | + return 0; | |
740 | +} | |
741 | + | |
742 | +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |
743 | +{ | |
744 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
745 | + unsigned char *b = skb_tail_pointer(skb); | |
746 | + struct tc_esfq_qopt opt; | |
747 | + | |
748 | + opt.quantum = q->quantum; | |
749 | + opt.perturb_period = q->perturb_period/HZ; | |
750 | + | |
751 | + opt.limit = q->limit; | |
752 | + opt.divisor = q->hash_divisor; | |
753 | + opt.flows = q->depth; | |
754 | + opt.hash_kind = q->hash_kind; | |
755 | + | |
0c3ec466 AM |
756 | + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) |
757 | + goto nla_put_failure; | |
2380c486 JR |
758 | + return skb->len; |
759 | + | |
760 | +nla_put_failure: | |
761 | + nlmsg_trim(skb, b); | |
762 | + return -1; | |
763 | +} | |
764 | + | |
765 | +static struct Qdisc_ops esfq_qdisc_ops = | |
766 | +{ | |
767 | + .next = NULL, | |
768 | + .cl_ops = NULL, | |
769 | + .id = "esfq", | |
770 | + .priv_size = sizeof(struct esfq_sched_data), | |
771 | + .enqueue = esfq_enqueue, | |
772 | + .dequeue = esfq_dequeue, | |
921c0c8a | 773 | + .peek = esfq_peek, |
2380c486 JR |
774 | + .drop = esfq_drop, |
775 | + .init = esfq_init, | |
776 | + .reset = esfq_reset, | |
777 | + .destroy = esfq_destroy, | |
778 | + .change = esfq_change, | |
779 | + .dump = esfq_dump, | |
780 | + .owner = THIS_MODULE, | |
781 | +}; | |
782 | + | |
783 | +static int __init esfq_module_init(void) | |
784 | +{ | |
785 | + return register_qdisc(&esfq_qdisc_ops); | |
786 | +} | |
787 | +static void __exit esfq_module_exit(void) | |
788 | +{ | |
789 | + unregister_qdisc(&esfq_qdisc_ops); | |
790 | +} | |
791 | +module_init(esfq_module_init) | |
792 | +module_exit(esfq_module_exit) | |
793 | +MODULE_LICENSE("GPL"); |