]>
Commit | Line | Data |
---|---|---|
1f3e3010 | 1 | diff -Naur linux-2.6.24.orig/include/linux/pkt_sched.h linux-2.6.24/include/linux/pkt_sched.h |
2 | --- linux-2.6.24.orig/include/linux/pkt_sched.h 2008-01-24 14:58:37.000000000 -0800 | |
3 | +++ linux-2.6.24/include/linux/pkt_sched.h 2008-01-28 00:27:12.000000000 -0800 | |
4 | @@ -157,6 +157,33 @@ | |
7a5b0711 | 5 | * to change these parameters in compile time. |
7a5b0711 PS |
6 | */ |
7 | ||
8 | +/* ESFQ section */ | |
9 | + | |
10 | +enum | |
11 | +{ | |
12 | + /* traditional */ | |
1f3e3010 | 13 | + TCA_ESFQ_HASH_CLASSIC, |
14 | + TCA_ESFQ_HASH_DST, | |
15 | + TCA_ESFQ_HASH_SRC, | |
16 | + TCA_ESFQ_HASH_FWMARK, | |
9df9eb23 | 17 | + /* conntrack */ |
1f3e3010 | 18 | + TCA_ESFQ_HASH_CTORIGDST, |
19 | + TCA_ESFQ_HASH_CTORIGSRC, | |
20 | + TCA_ESFQ_HASH_CTREPLDST, | |
21 | + TCA_ESFQ_HASH_CTREPLSRC, | |
22 | + TCA_ESFQ_HASH_CTNATCHG, | |
7a5b0711 PS |
23 | +}; |
24 | + | |
25 | +struct tc_esfq_qopt | |
26 | +{ | |
27 | + unsigned quantum; /* Bytes per round allocated to flow */ | |
28 | + int perturb_period; /* Period of hash perturbation */ | |
29 | + __u32 limit; /* Maximal packets in queue */ | |
30 | + unsigned divisor; /* Hash divisor */ | |
31 | + unsigned flows; /* Maximal number of flows */ | |
32 | + unsigned hash_kind; /* Hash function to use for flow identification */ | |
33 | +}; | |
34 | + | |
35 | /* RED section */ | |
36 | ||
37 | enum | |
1f3e3010 | 38 | diff -Naur linux-2.6.24.orig/net/sched/Kconfig linux-2.6.24/net/sched/Kconfig |
39 | --- linux-2.6.24.orig/net/sched/Kconfig 2008-01-24 14:58:37.000000000 -0800 | |
40 | +++ linux-2.6.24/net/sched/Kconfig 2008-01-28 00:27:12.000000000 -0800 | |
41 | @@ -139,6 +139,37 @@ | |
7a5b0711 PS |
42 | To compile this code as a module, choose M here: the |
43 | module will be called sch_sfq. | |
44 | ||
45 | +config NET_SCH_ESFQ | |
9df9eb23 | 46 | + tristate "Enhanced Stochastic Fairness Queueing (ESFQ)" |
7a5b0711 PS |
47 | + ---help--- |
48 | + Say Y here if you want to use the Enhanced Stochastic Fairness | |
49 | + Queueing (ESFQ) packet scheduling algorithm for some of your network | |
50 | + devices or as a leaf discipline for a classful qdisc such as HTB or | |
51 | + CBQ (see the top of <file:net/sched/sch_esfq.c> for details and | |
52 | + references to the SFQ algorithm). | |
9df9eb23 | 53 | + |
7a5b0711 | 54 | + This is an enchanced SFQ version which allows you to control some |
9df9eb23 | 55 | + hardcoded values in the SFQ scheduler. |
56 | + | |
57 | + ESFQ also adds control of the hash function used to identify packet | |
58 | + flows. The original SFQ discipline hashes by connection; ESFQ add | |
59 | + several other hashing methods, such as by src IP or by dst IP, which | |
60 | + can be more fair to users in some networking situations. | |
7a5b0711 PS |
61 | + |
62 | + To compile this code as a module, choose M here: the | |
63 | + module will be called sch_esfq. | |
9df9eb23 | 64 | + |
65 | +config NET_SCH_ESFQ_NFCT | |
66 | + bool "Connection Tracking Hash Types" | |
1f3e3010 | 67 | + depends on NET_SCH_ESFQ && NF_CONNTRACK_ENABLED |
9df9eb23 | 68 | + ---help--- |
69 | + Say Y here to enable support for hashing based on netfilter connection | |
70 | + tracking information. This is useful for a router that is also using | |
71 | + NAT to connect privately-addressed hosts to the Internet. If you want | |
72 | + to provide fair distribution of upstream bandwidth, ESFQ must use | |
73 | + connection tracking information, since all outgoing packets will share | |
74 | + the same source address. | |
7a5b0711 PS |
75 | + |
76 | config NET_SCH_TEQL | |
9df9eb23 | 77 | tristate "True Link Equalizer (TEQL)" |
78 | ---help--- | |
1f3e3010 | 79 | diff -Naur linux-2.6.24.orig/net/sched/Makefile linux-2.6.24/net/sched/Makefile |
80 | --- linux-2.6.24.orig/net/sched/Makefile 2008-01-24 14:58:37.000000000 -0800 | |
81 | +++ linux-2.6.24/net/sched/Makefile 2008-01-28 00:27:12.000000000 -0800 | |
7a5b0711 PS |
82 | @@ -23,6 +23,7 @@ |
83 | obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o | |
84 | obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o | |
85 | obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o | |
86 | +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o | |
87 | obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o | |
88 | obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o | |
89 | obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o | |
1f3e3010 | 90 | diff -Naur linux-2.6.24.orig/net/sched/sch_esfq.c linux-2.6.24/net/sched/sch_esfq.c |
91 | --- linux-2.6.24.orig/net/sched/sch_esfq.c 1969-12-31 16:00:00.000000000 -0800 | |
92 | +++ linux-2.6.24/net/sched/sch_esfq.c 2008-01-28 00:27:22.000000000 -0800 | |
93 | @@ -0,0 +1,703 @@ | |
7a5b0711 PS |
94 | +/* |
95 | + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. | |
96 | + * | |
97 | + * This program is free software; you can redistribute it and/or | |
98 | + * modify it under the terms of the GNU General Public License | |
99 | + * as published by the Free Software Foundation; either version | |
100 | + * 2 of the License, or (at your option) any later version. | |
101 | + * | |
102 | + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | |
103 | + * | |
104 | + * Changes: Alexander Atanasov, <alex@ssi.bg> | |
105 | + * Added dynamic depth,limit,divisor,hash_kind options. | |
106 | + * Added dst and src hashes. | |
107 | + * | |
108 | + * Alexander Clouter, <alex@digriz.org.uk> | |
109 | + * Ported ESFQ to Linux 2.6. | |
110 | + * | |
111 | + * Corey Hickey, <bugfood-c@fatooh.org> | |
112 | + * Maintenance of the Linux 2.6 port. | |
9df9eb23 | 113 | + * Added fwmark hash (thanks to Robert Kurjata). |
114 | + * Added usage of jhash. | |
115 | + * Added conntrack support. | |
116 | + * Added ctnatchg hash (thanks to Ben Pfountz). | |
7a5b0711 PS |
117 | + */ |
118 | + | |
7a5b0711 PS |
119 | +#include <linux/module.h> |
120 | +#include <asm/uaccess.h> | |
121 | +#include <asm/system.h> | |
122 | +#include <linux/bitops.h> | |
123 | +#include <linux/types.h> | |
124 | +#include <linux/kernel.h> | |
125 | +#include <linux/jiffies.h> | |
126 | +#include <linux/string.h> | |
127 | +#include <linux/mm.h> | |
128 | +#include <linux/socket.h> | |
129 | +#include <linux/sockios.h> | |
130 | +#include <linux/in.h> | |
131 | +#include <linux/errno.h> | |
132 | +#include <linux/interrupt.h> | |
133 | +#include <linux/if_ether.h> | |
134 | +#include <linux/inet.h> | |
135 | +#include <linux/netdevice.h> | |
136 | +#include <linux/etherdevice.h> | |
137 | +#include <linux/notifier.h> | |
138 | +#include <linux/init.h> | |
139 | +#include <net/ip.h> | |
1f3e3010 | 140 | +#include <net/netlink.h> |
7a5b0711 PS |
141 | +#include <linux/ipv6.h> |
142 | +#include <net/route.h> | |
143 | +#include <linux/skbuff.h> | |
144 | +#include <net/sock.h> | |
145 | +#include <net/pkt_sched.h> | |
9df9eb23 | 146 | +#include <linux/jhash.h> |
147 | +#include <net/netfilter/nf_conntrack.h> | |
7a5b0711 PS |
148 | + |
149 | +/* Stochastic Fairness Queuing algorithm. | |
150 | + For more comments look at sch_sfq.c. | |
151 | + The difference is that you can change limit, depth, | |
9df9eb23 | 152 | + hash table size and choose alternate hash types. |
7a5b0711 PS |
153 | + |
154 | + classic: same as in sch_sfq.c | |
155 | + dst: destination IP address | |
156 | + src: source IP address | |
9df9eb23 | 157 | + fwmark: netfilter mark value |
158 | + ctorigdst: original destination IP address | |
159 | + ctorigsrc: original source IP address | |
160 | + ctrepldst: reply destination IP address | |
161 | + ctreplsrc: reply source IP | |
7a5b0711 | 162 | + |
7a5b0711 PS |
163 | +*/ |
164 | + | |
9df9eb23 | 165 | +#define ESFQ_HEAD 0 |
166 | +#define ESFQ_TAIL 1 | |
7a5b0711 PS |
167 | + |
168 | +/* This type should contain at least SFQ_DEPTH*2 values */ | |
169 | +typedef unsigned int esfq_index; | |
170 | + | |
171 | +struct esfq_head | |
172 | +{ | |
173 | + esfq_index next; | |
174 | + esfq_index prev; | |
175 | +}; | |
176 | + | |
177 | +struct esfq_sched_data | |
178 | +{ | |
179 | +/* Parameters */ | |
180 | + int perturb_period; | |
181 | + unsigned quantum; /* Allotment per round: MUST BE >= MTU */ | |
182 | + int limit; | |
183 | + unsigned depth; | |
184 | + unsigned hash_divisor; | |
185 | + unsigned hash_kind; | |
186 | +/* Variables */ | |
187 | + struct timer_list perturb_timer; | |
188 | + int perturbation; | |
189 | + esfq_index tail; /* Index of current slot in round */ | |
190 | + esfq_index max_depth; /* Maximal depth */ | |
191 | + | |
192 | + esfq_index *ht; /* Hash table */ | |
193 | + esfq_index *next; /* Active slots link */ | |
194 | + short *allot; /* Current allotment per slot */ | |
195 | + unsigned short *hash; /* Hash value indexed by slots */ | |
196 | + struct sk_buff_head *qs; /* Slot queue */ | |
197 | + struct esfq_head *dep; /* Linked list of slots, indexed by depth */ | |
7a5b0711 PS |
198 | +}; |
199 | + | |
9df9eb23 | 200 | +/* This contains the info we will hash. */ |
201 | +struct esfq_packet_info | |
7a5b0711 | 202 | +{ |
9df9eb23 | 203 | + u32 proto; /* protocol or port */ |
204 | + u32 src; /* source from packet header */ | |
205 | + u32 dst; /* destination from packet header */ | |
206 | + u32 ctorigsrc; /* original source from conntrack */ | |
207 | + u32 ctorigdst; /* original destination from conntrack */ | |
208 | + u32 ctreplsrc; /* reply source from conntrack */ | |
209 | + u32 ctrepldst; /* reply destination from conntrack */ | |
210 | + u32 mark; /* netfilter mark (fwmark) */ | |
211 | +}; | |
7a5b0711 | 212 | + |
9df9eb23 | 213 | +static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a) |
214 | +{ | |
215 | + return jhash_1word(a, q->perturbation) & (q->hash_divisor-1); | |
7a5b0711 PS |
216 | +} |
217 | + | |
9df9eb23 | 218 | +static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b) |
7a5b0711 | 219 | +{ |
9df9eb23 | 220 | + return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1); |
7a5b0711 PS |
221 | +} |
222 | + | |
9df9eb23 | 223 | +static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c) |
7a5b0711 | 224 | +{ |
9df9eb23 | 225 | + return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1); |
7a5b0711 PS |
226 | +} |
227 | + | |
228 | +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) | |
229 | +{ | |
9df9eb23 | 230 | + struct esfq_packet_info info; |
231 | +#ifdef CONFIG_NET_SCH_ESFQ_NFCT | |
232 | + enum ip_conntrack_info ctinfo; | |
233 | + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | |
234 | +#endif | |
235 | + | |
7a5b0711 PS |
236 | + switch (skb->protocol) { |
237 | + case __constant_htons(ETH_P_IP): | |
238 | + { | |
1f3e3010 | 239 | + struct iphdr *iph = ip_hdr(skb); |
9df9eb23 | 240 | + info.dst = iph->daddr; |
241 | + info.src = iph->saddr; | |
7a5b0711 PS |
242 | + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && |
243 | + (iph->protocol == IPPROTO_TCP || | |
244 | + iph->protocol == IPPROTO_UDP || | |
9df9eb23 | 245 | + iph->protocol == IPPROTO_SCTP || |
246 | + iph->protocol == IPPROTO_DCCP || | |
7a5b0711 | 247 | + iph->protocol == IPPROTO_ESP)) |
9df9eb23 | 248 | + info.proto = *(((u32*)iph) + iph->ihl); |
249 | + else | |
250 | + info.proto = iph->protocol; | |
7a5b0711 PS |
251 | + break; |
252 | + } | |
253 | + case __constant_htons(ETH_P_IPV6): | |
254 | + { | |
1f3e3010 | 255 | + struct ipv6hdr *iph = ipv6_hdr(skb); |
9df9eb23 | 256 | + /* Hash ipv6 addresses into a u32. This isn't ideal, |
257 | + * but the code is simple. */ | |
258 | + info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); | |
259 | + info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); | |
7a5b0711 PS |
260 | + if (iph->nexthdr == IPPROTO_TCP || |
261 | + iph->nexthdr == IPPROTO_UDP || | |
9df9eb23 | 262 | + iph->nexthdr == IPPROTO_SCTP || |
263 | + iph->nexthdr == IPPROTO_DCCP || | |
7a5b0711 | 264 | + iph->nexthdr == IPPROTO_ESP) |
9df9eb23 | 265 | + info.proto = *(u32*)&iph[1]; |
266 | + else | |
267 | + info.proto = iph->nexthdr; | |
7a5b0711 PS |
268 | + break; |
269 | + } | |
270 | + default: | |
9df9eb23 | 271 | + info.dst = (u32)(unsigned long)skb->dst; |
272 | + info.src = (u32)(unsigned long)skb->sk; | |
273 | + info.proto = skb->protocol; | |
7a5b0711 | 274 | + } |
9df9eb23 | 275 | + |
276 | + info.mark = skb->mark; | |
277 | + | |
278 | +#ifdef CONFIG_NET_SCH_ESFQ_NFCT | |
279 | + /* defaults if there is no conntrack info */ | |
280 | + info.ctorigsrc = info.src; | |
281 | + info.ctorigdst = info.dst; | |
282 | + info.ctreplsrc = info.dst; | |
283 | + info.ctrepldst = info.src; | |
284 | + /* collect conntrack info */ | |
285 | + if (ct && ct != &nf_conntrack_untracked) { | |
286 | + if (skb->protocol == __constant_htons(ETH_P_IP)) { | |
287 | + info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; | |
288 | + info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; | |
289 | + info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; | |
290 | + info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; | |
291 | + } | |
292 | + else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { | |
293 | + /* Again, hash ipv6 addresses into a single u32. */ | |
294 | + info.ctorigsrc = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, q->perturbation); | |
295 | + info.ctorigdst = jhash2(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, q->perturbation); | |
296 | + info.ctreplsrc = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, q->perturbation); | |
297 | + info.ctrepldst = jhash2(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, q->perturbation); | |
298 | + } | |
299 | + | |
300 | + } | |
301 | +#endif | |
302 | + | |
303 | + switch(q->hash_kind) { | |
1f3e3010 | 304 | + case TCA_ESFQ_HASH_CLASSIC: |
9df9eb23 | 305 | + return esfq_jhash_3words(q, info.dst, info.src, info.proto); |
1f3e3010 | 306 | + case TCA_ESFQ_HASH_DST: |
9df9eb23 | 307 | + return esfq_jhash_1word(q, info.dst); |
1f3e3010 | 308 | + case TCA_ESFQ_HASH_SRC: |
9df9eb23 | 309 | + return esfq_jhash_1word(q, info.src); |
1f3e3010 | 310 | + case TCA_ESFQ_HASH_FWMARK: |
9df9eb23 | 311 | + return esfq_jhash_1word(q, info.mark); |
312 | +#ifdef CONFIG_NET_SCH_ESFQ_NFCT | |
1f3e3010 | 313 | + case TCA_ESFQ_HASH_CTORIGDST: |
9df9eb23 | 314 | + return esfq_jhash_1word(q, info.ctorigdst); |
1f3e3010 | 315 | + case TCA_ESFQ_HASH_CTORIGSRC: |
9df9eb23 | 316 | + return esfq_jhash_1word(q, info.ctorigsrc); |
1f3e3010 | 317 | + case TCA_ESFQ_HASH_CTREPLDST: |
9df9eb23 | 318 | + return esfq_jhash_1word(q, info.ctrepldst); |
1f3e3010 | 319 | + case TCA_ESFQ_HASH_CTREPLSRC: |
9df9eb23 | 320 | + return esfq_jhash_1word(q, info.ctreplsrc); |
1f3e3010 | 321 | + case TCA_ESFQ_HASH_CTNATCHG: |
9df9eb23 | 322 | + { |
323 | + if (info.ctorigdst == info.ctreplsrc) | |
324 | + return esfq_jhash_1word(q, info.ctorigsrc); | |
325 | + return esfq_jhash_1word(q, info.ctreplsrc); | |
326 | + } | |
7a5b0711 PS |
327 | +#endif |
328 | + default: | |
329 | + if (net_ratelimit()) | |
330 | + printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n"); | |
331 | + } | |
9df9eb23 | 332 | + return esfq_jhash_3words(q, info.dst, info.src, info.proto); |
7a5b0711 PS |
333 | +} |
334 | + | |
335 | +static inline void esfq_link(struct esfq_sched_data *q, esfq_index x) | |
336 | +{ | |
337 | + esfq_index p, n; | |
338 | + int d = q->qs[x].qlen + q->depth; | |
339 | + | |
340 | + p = d; | |
341 | + n = q->dep[d].next; | |
342 | + q->dep[x].next = n; | |
343 | + q->dep[x].prev = p; | |
344 | + q->dep[p].next = q->dep[n].prev = x; | |
345 | +} | |
346 | + | |
347 | +static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x) | |
348 | +{ | |
349 | + esfq_index p, n; | |
350 | + | |
351 | + n = q->dep[x].next; | |
352 | + p = q->dep[x].prev; | |
353 | + q->dep[p].next = n; | |
354 | + q->dep[n].prev = p; | |
355 | + | |
356 | + if (n == p && q->max_depth == q->qs[x].qlen + 1) | |
357 | + q->max_depth--; | |
358 | + | |
359 | + esfq_link(q, x); | |
360 | +} | |
361 | + | |
362 | +static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x) | |
363 | +{ | |
364 | + esfq_index p, n; | |
365 | + int d; | |
366 | + | |
367 | + n = q->dep[x].next; | |
368 | + p = q->dep[x].prev; | |
369 | + q->dep[p].next = n; | |
370 | + q->dep[n].prev = p; | |
371 | + d = q->qs[x].qlen; | |
372 | + if (q->max_depth < d) | |
373 | + q->max_depth = d; | |
374 | + | |
375 | + esfq_link(q, x); | |
376 | +} | |
377 | + | |
378 | +static unsigned int esfq_drop(struct Qdisc *sch) | |
379 | +{ | |
380 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
381 | + esfq_index d = q->max_depth; | |
382 | + struct sk_buff *skb; | |
383 | + unsigned int len; | |
384 | + | |
385 | + /* Queue is full! Find the longest slot and | |
386 | + drop a packet from it */ | |
387 | + | |
388 | + if (d > 1) { | |
389 | + esfq_index x = q->dep[d+q->depth].next; | |
390 | + skb = q->qs[x].prev; | |
391 | + len = skb->len; | |
392 | + __skb_unlink(skb, &q->qs[x]); | |
393 | + kfree_skb(skb); | |
394 | + esfq_dec(q, x); | |
395 | + sch->q.qlen--; | |
396 | + sch->qstats.drops++; | |
9df9eb23 | 397 | + sch->qstats.backlog -= len; |
7a5b0711 PS |
398 | + return len; |
399 | + } | |
400 | + | |
401 | + if (d == 1) { | |
402 | + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ | |
403 | + d = q->next[q->tail]; | |
404 | + q->next[q->tail] = q->next[d]; | |
405 | + q->allot[q->next[d]] += q->quantum; | |
406 | + skb = q->qs[d].prev; | |
407 | + len = skb->len; | |
408 | + __skb_unlink(skb, &q->qs[d]); | |
409 | + kfree_skb(skb); | |
410 | + esfq_dec(q, d); | |
411 | + sch->q.qlen--; | |
412 | + q->ht[q->hash[d]] = q->depth; | |
413 | + sch->qstats.drops++; | |
9df9eb23 | 414 | + sch->qstats.backlog -= len; |
7a5b0711 PS |
415 | + return len; |
416 | + } | |
417 | + | |
418 | + return 0; | |
419 | +} | |
420 | + | |
9df9eb23 | 421 | +static void esfq_q_enqueue(struct sk_buff *skb, struct esfq_sched_data *q, unsigned int end) |
7a5b0711 | 422 | +{ |
7a5b0711 PS |
423 | + unsigned hash = esfq_hash(q, skb); |
424 | + unsigned depth = q->depth; | |
425 | + esfq_index x; | |
426 | + | |
427 | + x = q->ht[hash]; | |
428 | + if (x == depth) { | |
429 | + q->ht[hash] = x = q->dep[depth].next; | |
430 | + q->hash[x] = hash; | |
431 | + } | |
9df9eb23 | 432 | + |
433 | + if (end == ESFQ_TAIL) | |
434 | + __skb_queue_tail(&q->qs[x], skb); | |
435 | + else | |
436 | + __skb_queue_head(&q->qs[x], skb); | |
437 | + | |
7a5b0711 PS |
438 | + esfq_inc(q, x); |
439 | + if (q->qs[x].qlen == 1) { /* The flow is new */ | |
440 | + if (q->tail == depth) { /* It is the first flow */ | |
441 | + q->tail = x; | |
442 | + q->next[x] = x; | |
443 | + q->allot[x] = q->quantum; | |
444 | + } else { | |
445 | + q->next[x] = q->next[q->tail]; | |
446 | + q->next[q->tail] = x; | |
447 | + q->tail = x; | |
448 | + } | |
449 | + } | |
9df9eb23 | 450 | +} |
451 | + | |
452 | +static int esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |
453 | +{ | |
454 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
455 | + esfq_q_enqueue(skb, q, ESFQ_TAIL); | |
456 | + sch->qstats.backlog += skb->len; | |
7a5b0711 PS |
457 | + if (++sch->q.qlen < q->limit-1) { |
458 | + sch->bstats.bytes += skb->len; | |
459 | + sch->bstats.packets++; | |
460 | + return 0; | |
461 | + } | |
462 | + | |
9df9eb23 | 463 | + sch->qstats.drops++; |
7a5b0711 PS |
464 | + esfq_drop(sch); |
465 | + return NET_XMIT_CN; | |
466 | +} | |
467 | + | |
9df9eb23 | 468 | + |
469 | +static int esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) | |
7a5b0711 PS |
470 | +{ |
471 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
9df9eb23 | 472 | + esfq_q_enqueue(skb, q, ESFQ_HEAD); |
473 | + sch->qstats.backlog += skb->len; | |
7a5b0711 PS |
474 | + if (++sch->q.qlen < q->limit - 1) { |
475 | + sch->qstats.requeues++; | |
476 | + return 0; | |
477 | + } | |
478 | + | |
479 | + sch->qstats.drops++; | |
480 | + esfq_drop(sch); | |
481 | + return NET_XMIT_CN; | |
482 | +} | |
483 | + | |
9df9eb23 | 484 | +static struct sk_buff *esfq_q_dequeue(struct esfq_sched_data *q) |
7a5b0711 | 485 | +{ |
7a5b0711 PS |
486 | + struct sk_buff *skb; |
487 | + unsigned depth = q->depth; | |
488 | + esfq_index a, old_a; | |
489 | + | |
490 | + /* No active slots */ | |
491 | + if (q->tail == depth) | |
492 | + return NULL; | |
493 | + | |
494 | + a = old_a = q->next[q->tail]; | |
495 | + | |
496 | + /* Grab packet */ | |
497 | + skb = __skb_dequeue(&q->qs[a]); | |
498 | + esfq_dec(q, a); | |
7a5b0711 PS |
499 | + |
500 | + /* Is the slot empty? */ | |
501 | + if (q->qs[a].qlen == 0) { | |
502 | + q->ht[q->hash[a]] = depth; | |
503 | + a = q->next[a]; | |
504 | + if (a == old_a) { | |
505 | + q->tail = depth; | |
506 | + return skb; | |
507 | + } | |
508 | + q->next[q->tail] = a; | |
509 | + q->allot[a] += q->quantum; | |
510 | + } else if ((q->allot[a] -= skb->len) <= 0) { | |
511 | + q->tail = a; | |
512 | + a = q->next[a]; | |
513 | + q->allot[a] += q->quantum; | |
514 | + } | |
515 | + | |
516 | + return skb; | |
517 | +} | |
518 | + | |
9df9eb23 | 519 | +static struct sk_buff *esfq_dequeue(struct Qdisc* sch) |
520 | +{ | |
521 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
522 | + struct sk_buff *skb; | |
523 | + | |
524 | + skb = esfq_q_dequeue(q); | |
525 | + if (skb == NULL) | |
526 | + return NULL; | |
527 | + sch->q.qlen--; | |
528 | + sch->qstats.backlog -= skb->len; | |
529 | + return skb; | |
530 | +} | |
531 | + | |
532 | +static void esfq_q_destroy(struct esfq_sched_data *q) | |
533 | +{ | |
534 | + del_timer(&q->perturb_timer); | |
535 | + if(q->ht) | |
536 | + kfree(q->ht); | |
537 | + if(q->dep) | |
538 | + kfree(q->dep); | |
539 | + if(q->next) | |
540 | + kfree(q->next); | |
541 | + if(q->allot) | |
542 | + kfree(q->allot); | |
543 | + if(q->hash) | |
544 | + kfree(q->hash); | |
545 | + if(q->qs) | |
546 | + kfree(q->qs); | |
547 | +} | |
548 | + | |
549 | +static void esfq_destroy(struct Qdisc *sch) | |
550 | +{ | |
551 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
552 | + esfq_q_destroy(q); | |
553 | +} | |
554 | + | |
555 | + | |
556 | +static void esfq_reset(struct Qdisc* sch) | |
7a5b0711 PS |
557 | +{ |
558 | + struct sk_buff *skb; | |
559 | + | |
560 | + while ((skb = esfq_dequeue(sch)) != NULL) | |
561 | + kfree_skb(skb); | |
562 | +} | |
563 | + | |
564 | +static void esfq_perturbation(unsigned long arg) | |
565 | +{ | |
566 | + struct Qdisc *sch = (struct Qdisc*)arg; | |
567 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
568 | + | |
569 | + q->perturbation = net_random()&0x1F; | |
570 | + | |
571 | + if (q->perturb_period) { | |
572 | + q->perturb_timer.expires = jiffies + q->perturb_period; | |
573 | + add_timer(&q->perturb_timer); | |
574 | + } | |
575 | +} | |
576 | + | |
9df9eb23 | 577 | +static unsigned int esfq_check_hash(unsigned int kind) |
7a5b0711 | 578 | +{ |
9df9eb23 | 579 | + switch (kind) { |
1f3e3010 | 580 | + case TCA_ESFQ_HASH_CTORIGDST: |
581 | + case TCA_ESFQ_HASH_CTORIGSRC: | |
582 | + case TCA_ESFQ_HASH_CTREPLDST: | |
583 | + case TCA_ESFQ_HASH_CTREPLSRC: | |
584 | + case TCA_ESFQ_HASH_CTNATCHG: | |
9df9eb23 | 585 | +#ifndef CONFIG_NET_SCH_ESFQ_NFCT |
586 | + { | |
587 | + if (net_ratelimit()) | |
588 | + printk(KERN_WARNING "ESFQ: Conntrack hash types disabled in kernel config. Falling back to classic.\n"); | |
1f3e3010 | 589 | + return TCA_ESFQ_HASH_CLASSIC; |
9df9eb23 | 590 | + } |
591 | +#endif | |
1f3e3010 | 592 | + case TCA_ESFQ_HASH_CLASSIC: |
593 | + case TCA_ESFQ_HASH_DST: | |
594 | + case TCA_ESFQ_HASH_SRC: | |
595 | + case TCA_ESFQ_HASH_FWMARK: | |
9df9eb23 | 596 | + return kind; |
597 | + default: | |
598 | + { | |
599 | + if (net_ratelimit()) | |
600 | + printk(KERN_WARNING "ESFQ: Unknown hash type. Falling back to classic.\n"); | |
1f3e3010 | 601 | + return TCA_ESFQ_HASH_CLASSIC; |
7a5b0711 | 602 | + } |
7a5b0711 | 603 | + } |
7a5b0711 | 604 | +} |
9df9eb23 | 605 | + |
f74bf832 | 606 | +static int esfq_q_init(struct esfq_sched_data *q, struct nlattr *opt) |
7a5b0711 | 607 | +{ |
f74bf832 | 608 | + struct tc_esfq_qopt *ctl = nla_data(opt); |
9df9eb23 | 609 | + esfq_index p = ~0U/2; |
7a5b0711 PS |
610 | + int i; |
611 | + | |
f74bf832 | 612 | + if (opt && opt->nla_len < nla_attr_size(sizeof(*ctl))) |
7a5b0711 PS |
613 | + return -EINVAL; |
614 | + | |
7a5b0711 | 615 | + q->perturbation = 0; |
1f3e3010 | 616 | + q->hash_kind = TCA_ESFQ_HASH_CLASSIC; |
7a5b0711 | 617 | + q->max_depth = 0; |
7a5b0711 | 618 | + if (opt == NULL) { |
7a5b0711 PS |
619 | + q->perturb_period = 0; |
620 | + q->hash_divisor = 1024; | |
621 | + q->tail = q->limit = q->depth = 128; | |
622 | + | |
623 | + } else { | |
f74bf832 | 624 | + struct tc_esfq_qopt *ctl = nla_data(opt); |
9df9eb23 | 625 | + if (ctl->quantum) |
626 | + q->quantum = ctl->quantum; | |
7a5b0711 PS |
627 | + q->perturb_period = ctl->perturb_period*HZ; |
628 | + q->hash_divisor = ctl->divisor ? : 1024; | |
629 | + q->tail = q->limit = q->depth = ctl->flows ? : 128; | |
630 | + | |
631 | + if ( q->depth > p - 1 ) | |
632 | + return -EINVAL; | |
633 | + | |
634 | + if (ctl->limit) | |
635 | + q->limit = min_t(u32, ctl->limit, q->depth); | |
636 | + | |
637 | + if (ctl->hash_kind) { | |
9df9eb23 | 638 | + q->hash_kind = esfq_check_hash(ctl->hash_kind); |
7a5b0711 PS |
639 | + } |
640 | + } | |
641 | + | |
642 | + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); | |
643 | + if (!q->ht) | |
644 | + goto err_case; | |
7a5b0711 PS |
645 | + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); |
646 | + if (!q->dep) | |
647 | + goto err_case; | |
648 | + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); | |
649 | + if (!q->next) | |
650 | + goto err_case; | |
7a5b0711 PS |
651 | + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); |
652 | + if (!q->allot) | |
653 | + goto err_case; | |
654 | + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); | |
655 | + if (!q->hash) | |
656 | + goto err_case; | |
657 | + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); | |
658 | + if (!q->qs) | |
659 | + goto err_case; | |
660 | + | |
661 | + for (i=0; i< q->hash_divisor; i++) | |
662 | + q->ht[i] = q->depth; | |
663 | + for (i=0; i<q->depth; i++) { | |
664 | + skb_queue_head_init(&q->qs[i]); | |
665 | + q->dep[i+q->depth].next = i+q->depth; | |
666 | + q->dep[i+q->depth].prev = i+q->depth; | |
667 | + } | |
668 | + | |
669 | + for (i=0; i<q->depth; i++) | |
670 | + esfq_link(q, i); | |
671 | + return 0; | |
672 | +err_case: | |
9df9eb23 | 673 | + esfq_q_destroy(q); |
7a5b0711 PS |
674 | + return -ENOBUFS; |
675 | +} | |
676 | + | |
f74bf832 | 677 | +static int esfq_init(struct Qdisc *sch, struct nlattr *opt) |
7a5b0711 PS |
678 | +{ |
679 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
9df9eb23 | 680 | + int err; |
681 | + | |
f74bf832 | 682 | + q->quantum = psched_mtu(qdisc_dev(sch)); /* default */ |
9df9eb23 | 683 | + if ((err = esfq_q_init(q, opt))) |
684 | + return err; | |
685 | + | |
686 | + init_timer(&q->perturb_timer); | |
687 | + q->perturb_timer.data = (unsigned long)sch; | |
688 | + q->perturb_timer.function = esfq_perturbation; | |
689 | + if (q->perturb_period) { | |
690 | + q->perturb_timer.expires = jiffies + q->perturb_period; | |
691 | + add_timer(&q->perturb_timer); | |
692 | + } | |
693 | + | |
694 | + return 0; | |
695 | +} | |
696 | + | |
f74bf832 | 697 | +static int esfq_change(struct Qdisc *sch, struct nlattr *opt) |
9df9eb23 | 698 | +{ |
699 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
700 | + struct esfq_sched_data new; | |
701 | + struct sk_buff *skb; | |
702 | + int err; | |
703 | + | |
704 | + /* set up new queue */ | |
705 | + memset(&new, 0, sizeof(struct esfq_sched_data)); | |
f74bf832 | 706 | + new.quantum = psched_mtu(qdisc_dev(sch)); /* default */ |
9df9eb23 | 707 | + if ((err = esfq_q_init(&new, opt))) |
708 | + return err; | |
709 | + | |
710 | + /* copy all packets from the old queue to the new queue */ | |
711 | + sch_tree_lock(sch); | |
712 | + while ((skb = esfq_q_dequeue(q)) != NULL) | |
713 | + esfq_q_enqueue(skb, &new, ESFQ_TAIL); | |
714 | + | |
715 | + /* clean up the old queue */ | |
716 | + esfq_q_destroy(q); | |
717 | + | |
718 | + /* copy elements of the new queue into the old queue */ | |
719 | + q->perturb_period = new.perturb_period; | |
720 | + q->quantum = new.quantum; | |
721 | + q->limit = new.limit; | |
722 | + q->depth = new.depth; | |
723 | + q->hash_divisor = new.hash_divisor; | |
724 | + q->hash_kind = new.hash_kind; | |
725 | + q->tail = new.tail; | |
726 | + q->max_depth = new.max_depth; | |
727 | + q->ht = new.ht; | |
728 | + q->dep = new.dep; | |
729 | + q->next = new.next; | |
730 | + q->allot = new.allot; | |
731 | + q->hash = new.hash; | |
732 | + q->qs = new.qs; | |
733 | + | |
734 | + /* finish up */ | |
735 | + if (q->perturb_period) { | |
736 | + q->perturb_timer.expires = jiffies + q->perturb_period; | |
737 | + add_timer(&q->perturb_timer); | |
738 | + } else { | |
739 | + q->perturbation = 0; | |
740 | + } | |
741 | + sch_tree_unlock(sch); | |
742 | + return 0; | |
7a5b0711 PS |
743 | +} |
744 | + | |
745 | +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |
746 | +{ | |
747 | + struct esfq_sched_data *q = qdisc_priv(sch); | |
1f3e3010 | 748 | + unsigned char *b = skb_tail_pointer(skb); |
7a5b0711 PS |
749 | + struct tc_esfq_qopt opt; |
750 | + | |
751 | + opt.quantum = q->quantum; | |
752 | + opt.perturb_period = q->perturb_period/HZ; | |
753 | + | |
754 | + opt.limit = q->limit; | |
755 | + opt.divisor = q->hash_divisor; | |
756 | + opt.flows = q->depth; | |
757 | + opt.hash_kind = q->hash_kind; | |
758 | + | |
f74bf832 | 759 | + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
7a5b0711 PS |
760 | + |
761 | + return skb->len; | |
762 | + | |
f74bf832 | 763 | +nla_put_failure: |
1f3e3010 | 764 | + nlmsg_trim(skb, b); |
7a5b0711 PS |
765 | + return -1; |
766 | +} | |
767 | + | |
768 | +static struct Qdisc_ops esfq_qdisc_ops = | |
769 | +{ | |
770 | + .next = NULL, | |
771 | + .cl_ops = NULL, | |
772 | + .id = "esfq", | |
773 | + .priv_size = sizeof(struct esfq_sched_data), | |
774 | + .enqueue = esfq_enqueue, | |
775 | + .dequeue = esfq_dequeue, | |
776 | + .requeue = esfq_requeue, | |
777 | + .drop = esfq_drop, | |
778 | + .init = esfq_init, | |
779 | + .reset = esfq_reset, | |
780 | + .destroy = esfq_destroy, | |
9df9eb23 | 781 | + .change = esfq_change, |
7a5b0711 PS |
782 | + .dump = esfq_dump, |
783 | + .owner = THIS_MODULE, | |
784 | +}; | |
785 | + | |
786 | +static int __init esfq_module_init(void) | |
787 | +{ | |
788 | + return register_qdisc(&esfq_qdisc_ops); | |
789 | +} | |
790 | +static void __exit esfq_module_exit(void) | |
791 | +{ | |
792 | + unregister_qdisc(&esfq_qdisc_ops); | |
793 | +} | |
794 | +module_init(esfq_module_init) | |
795 | +module_exit(esfq_module_exit) | |
796 | +MODULE_LICENSE("GPL"); |