1 diff -uNr linux-2.6.4-rc1/drivers/net.orig/imq.c linux-2.6.4-rc1/drivers/net/imq.c
2 --- linux-2.6.4-rc1/drivers/net.orig/imq.c 1970-01-01 01:00:00.000000000 +0100
3 +++ linux-2.6.4-rc1/drivers/net/imq.c 2004-03-03 03:43:30.262457760 +0100
6 + * Pseudo-driver for the intermediate queue device.
8 + * This program is free software; you can redistribute it and/or
9 + * modify it under the terms of the GNU General Public License
10 + * as published by the Free Software Foundation; either version
11 + * 2 of the License, or (at your option) any later version.
13 + * Authors: Patrick McHardy, <kaber@trash.net>
15 + * The first version was written by Martin Devera, <devik@cdi.cz>
17 + * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
18 + * - Update patch to 2.4.21
19 + * Sebastian Strollo <sstrollo@nortelnetworks.com>
20 + * - Fix "Dead-loop on netdevice imq"-issue
23 +#include <linux/kernel.h>
24 +#include <linux/module.h>
25 +#include <linux/config.h>
26 +#include <linux/skbuff.h>
27 +#include <linux/netdevice.h>
28 +#include <linux/rtnetlink.h>
29 +#include <linux/if_arp.h>
30 +#include <linux/netfilter.h>
31 +#include <linux/netfilter_ipv4.h>
32 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
33 +#include <linux/netfilter_ipv6.h>
35 +#include <linux/imq.h>
36 +#include <net/pkt_sched.h>
38 +static nf_hookfn imq_nf_hook;
40 +static struct nf_hook_ops imq_ingress_ipv4 = {
41 + .hook = imq_nf_hook,
42 + .owner = THIS_MODULE,
44 + .hooknum = NF_IP_PRE_ROUTING,
45 + .priority = NF_IP_PRI_MANGLE + 1
48 +static struct nf_hook_ops imq_egress_ipv4 = {
49 + .hook = imq_nf_hook,
50 + .owner = THIS_MODULE,
52 + .hooknum = NF_IP_POST_ROUTING,
53 + .priority = NF_IP_PRI_LAST
56 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
57 +static struct nf_hook_ops imq_ingress_ipv6 = {
58 + .hook = imq_nf_hook,
59 + .owner = THIS_MODULE,
61 + .hooknum = NF_IP6_PRE_ROUTING,
62 + .priority = NF_IP6_PRI_MANGLE + 1
65 +static struct nf_hook_ops imq_egress_ipv6 = {
66 + .hook = imq_nf_hook,
67 + .owner = THIS_MODULE,
69 + .hooknum = NF_IP6_POST_ROUTING,
70 + .priority = NF_IP6_PRI_LAST
74 +static unsigned int numdevs = 2;
76 +MODULE_PARM(numdevs, "i");
77 +MODULE_PARM_DESC(numdevs, "number of imq devices");
79 +static struct net_device *imq_devs;
82 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
84 + return (struct net_device_stats *)dev->priv;
87 +/* called for packets kfree'd in qdiscs at places other than enqueue */
88 +static void imq_skb_destructor(struct sk_buff *skb)
90 + struct nf_info *info = skb->nf_info;
94 + dev_put(info->indev);
96 + dev_put(info->outdev);
101 +static int imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
103 + struct net_device_stats *stats = (struct net_device_stats*) dev->priv;
105 + stats->tx_bytes += skb->len;
106 + stats->tx_packets++;
108 + skb->imq_flags = 0;
109 + skb->destructor = NULL;
111 + dev->trans_start = jiffies;
112 + nf_reinject(skb, skb->nf_info, NF_ACCEPT);
116 +static int imq_nf_queue(struct sk_buff *skb, struct nf_info *info,
119 + struct net_device *dev;
120 + struct net_device_stats *stats;
121 + struct sk_buff *skb2 = NULL;
123 + unsigned int index = skb->imq_flags&IMQ_F_IFMASK;
126 + if (index > numdevs)
129 + dev = imq_devs + index;
130 + if (!(dev->flags & IFF_UP)) {
131 + skb->imq_flags = 0;
132 + nf_reinject(skb, info, NF_ACCEPT);
135 + dev->last_rx = jiffies;
137 + if (skb->destructor) {
139 + skb = skb_clone(skb, GFP_ATOMIC);
143 + skb->nf_info = info;
145 + stats = (struct net_device_stats *)dev->priv;
146 + stats->rx_bytes+= skb->len;
147 + stats->rx_packets++;
149 + spin_lock_bh(&dev->queue_lock);
152 + q->enqueue(skb_get(skb), q);
153 + if (skb_shared(skb)) {
154 + skb->destructor = imq_skb_destructor;
159 + if (spin_is_locked(&dev->xmit_lock))
160 + netif_schedule(dev);
162 + while (!netif_queue_stopped(dev) && (qdisc_restart(dev) < 0));
163 + spin_unlock_bh(&dev->queue_lock);
166 + kfree_skb(ret ? skb : skb2);
171 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff **pskb,
172 + const struct net_device *indev,
173 + const struct net_device *outdev,
174 + int (*okfn)(struct sk_buff *))
176 + if ((*pskb)->imq_flags & IMQ_F_ENQUEUE)
183 +static int __init imq_init_hooks(void)
187 + if ((err = nf_register_queue_handler(PF_INET, imq_nf_queue, NULL)))
189 + if ((err = nf_register_hook(&imq_ingress_ipv4)))
191 + if ((err = nf_register_hook(&imq_egress_ipv4)))
193 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
194 + if ((err = nf_register_queue_handler(PF_INET6, imq_nf_queue, NULL)))
196 + if ((err = nf_register_hook(&imq_ingress_ipv6)))
198 + if ((err = nf_register_hook(&imq_egress_ipv6)))
204 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
206 + nf_unregister_hook(&imq_ingress_ipv6);
208 + nf_unregister_queue_handler(PF_INET6);
210 + nf_unregister_hook(&imq_egress_ipv4);
213 + nf_unregister_hook(&imq_ingress_ipv4);
215 + nf_unregister_queue_handler(PF_INET);
220 +static void __exit imq_unhook(void)
222 + nf_unregister_hook(&imq_ingress_ipv4);
223 + nf_unregister_hook(&imq_egress_ipv4);
224 + nf_unregister_queue_handler(PF_INET);
225 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
226 + nf_unregister_hook(&imq_ingress_ipv6);
227 + nf_unregister_hook(&imq_egress_ipv6);
228 + nf_unregister_queue_handler(PF_INET6);
232 +static int __init imq_dev_init(struct net_device *dev)
234 + dev->hard_start_xmit = imq_dev_xmit;
235 + dev->type = ARPHRD_VOID;
237 + dev->tx_queue_len = 30;
238 + dev->flags = IFF_NOARP;
239 + dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
240 + if (dev->priv == NULL)
242 + memset(dev->priv, 0, sizeof(struct net_device_stats));
243 + dev->get_stats = imq_get_stats;
248 +static void imq_dev_uninit(struct net_device *dev)
253 +static int __init imq_init_devs(void)
255 + struct net_device *dev;
258 + if (!numdevs || numdevs > IMQ_MAX_DEVS) {
259 + printk(KERN_ERR "numdevs has to be betweed 1 and %u\n",
264 + imq_devs = kmalloc(sizeof(struct net_device) * numdevs, GFP_KERNEL);
267 + memset(imq_devs, 0, sizeof(struct net_device) * numdevs);
269 + /* we start counting at zero */
272 + for (i = 0, dev = imq_devs; i <= numdevs; i++, dev++) {
273 + SET_MODULE_OWNER(dev);
274 + strcpy(dev->name, "imq%d");
275 + dev->init = imq_dev_init;
276 + dev->uninit = imq_dev_uninit;
278 + if (register_netdev(dev) < 0)
285 + unregister_netdev(--dev);
290 +static void imq_cleanup_devs(void)
293 + struct net_device *dev = imq_devs;
295 + for (i = 0; i <= numdevs; i++)
296 + unregister_netdev(dev++);
301 +static int __init imq_init_module(void)
305 + if ((err = imq_init_devs()))
307 + if ((err = imq_init_hooks())) {
308 + imq_cleanup_devs();
312 + printk(KERN_INFO "imq driver loaded.\n");
317 +static void __exit imq_cleanup_module(void)
320 + imq_cleanup_devs();
323 +module_init(imq_init_module);
324 +module_exit(imq_cleanup_module);
325 +MODULE_LICENSE("GPL");
326 diff -uNr linux-2.6.4-rc1/drivers/net.orig/Kconfig linux-2.6.4-rc1/drivers/net/Kconfig
327 --- linux-2.6.4-rc1/drivers/net.orig/Kconfig 2004-03-03 03:30:33.000000000 +0100
328 +++ linux-2.6.4-rc1/drivers/net/Kconfig 2004-03-03 03:43:30.237461560 +0100
330 To compile this driver as a module, choose M here: the module
331 will be called eql. If unsure, say N.
334 + tristate "IMQ (intermediate queueing device) support"
335 + depends on NETDEVICES && NETFILTER
337 + The imq device(s) is used as placeholder for QoS queueing disciplines.
338 + Every packet entering/leaving the ip stack can be directed through
339 + the imq device where it's enqueued/dequeued to the attached qdisc.
340 + This allows you to treat network devices as classes and distribute
341 + bandwidth among them. Iptables is used to specify through which imq
342 + device, if any, packets travel.
344 + To compile this driver as a module, choose M here: the module
345 + will be called imq. If unsure, say N.
348 tristate "Universal TUN/TAP device driver support"
349 depends on NETDEVICES
350 diff -uNr linux-2.6.4-rc1/drivers/net.orig/Makefile linux-2.6.4-rc1/drivers/net/Makefile
351 --- linux-2.6.4-rc1/drivers/net.orig/Makefile 2004-03-03 03:30:33.000000000 +0100
352 +++ linux-2.6.4-rc1/drivers/net/Makefile 2004-03-03 03:43:30.240461104 +0100
356 obj-$(CONFIG_DUMMY) += dummy.o
357 +obj-$(CONFIG_IMQ) += imq.o
358 obj-$(CONFIG_DE600) += de600.o
359 obj-$(CONFIG_DE620) += de620.o
360 obj-$(CONFIG_LANCE) += lance.o
361 diff -uNr linux-2.6.4-rc1/include.orig/linux/imq.h linux-2.6.4-rc1/include/linux/imq.h
362 --- linux-2.6.4-rc1/include.orig/linux/imq.h 1970-01-01 01:00:00.000000000 +0100
363 +++ linux-2.6.4-rc1/include/linux/imq.h 2004-03-03 03:43:30.264457456 +0100
368 +#define IMQ_MAX_DEVS 16
370 +#define IMQ_F_IFMASK 0x7f
371 +#define IMQ_F_ENQUEUE 0x80
374 diff -uNr linux-2.6.4-rc1/include.orig/linux/netfilter_ipv4/ipt_IMQ.h linux-2.6.4-rc1/include/linux/netfilter_ipv4/ipt_IMQ.h
375 --- linux-2.6.4-rc1/include.orig/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 01:00:00.000000000 +0100
376 +++ linux-2.6.4-rc1/include/linux/netfilter_ipv4/ipt_IMQ.h 2004-03-03 03:43:30.265457304 +0100
381 +struct ipt_imq_info {
382 + unsigned int todev; /* target imq device */
385 +#endif /* _IPT_IMQ_H */
386 diff -uNr linux-2.6.4-rc1/include.orig/linux/pkt_sched.h linux-2.6.4-rc1/include/linux/pkt_sched.h
387 --- linux-2.6.4-rc1/include.orig/linux/pkt_sched.h 2004-02-27 23:21:25.000000000 +0100
388 +++ linux-2.6.4-rc1/include/linux/pkt_sched.h 2004-03-03 03:43:30.297452440 +0100
395 + TCA_SFQ_HASH_CLASSIC,
402 unsigned quantum; /* Bytes per round allocated to flow */
404 __u32 limit; /* Maximal packets in queue */
405 unsigned divisor; /* Hash divisor */
406 unsigned flows; /* Maximal number of flows */
407 + unsigned hash_kind; /* Hash function to use for flow identification */
413 * The only reason for this is efficiency, it is possible
414 * to change these parameters in compile time.
416 + * If you need to play with this values use esfq.
420 diff -uNr linux-2.6.4-rc1/include.orig/linux/skbuff.h linux-2.6.4-rc1/include/linux/skbuff.h
421 --- linux-2.6.4-rc1/include.orig/linux/skbuff.h 2004-02-27 23:21:03.000000000 +0100
422 +++ linux-2.6.4-rc1/include/linux/skbuff.h 2004-03-03 03:43:30.309450616 +0100
427 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
431 struct sk_buff_head {
432 /* These two members must be first. */
436 unsigned char local_df,
442 #ifdef CONFIG_NET_SCHED
443 __u32 tc_index; /* traffic control index */
445 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
446 + struct nf_info *nf_info;
449 /* These elements must be at the end, see alloc_skb() for details. */
450 unsigned int truesize;
451 diff -uNr linux-2.6.4-rc1/net.orig/core/skbuff.c linux-2.6.4-rc1/net/core/skbuff.c
452 --- linux-2.6.4-rc1/net.orig/core/skbuff.c 2004-02-27 23:21:25.000000000 +0100
453 +++ linux-2.6.4-rc1/net/core/skbuff.c 2004-03-03 03:43:30.316449552 +0100
455 skb_shinfo(skb)->tso_size = 0;
456 skb_shinfo(skb)->tso_segs = 0;
457 skb_shinfo(skb)->frag_list = NULL;
459 +/* probably doomed to failure */
460 +#if defined(CONFIG_IMQ) || defined (CONFIG_IMQ_MODULE)
461 + skb->imq_flags = 0;
462 + skb->nf_info = NULL;
469 #ifdef CONFIG_NET_SCHED
472 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
477 atomic_set(&n->users, 1);
480 #ifdef CONFIG_NET_SCHED
481 new->tc_index = old->tc_index;
483 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
484 + new->imq_flags=old->imq_flags;
485 + new->nf_info=old->nf_info;
487 atomic_set(&new->users, 1);
490 diff -uNr linux-2.6.4-rc1/net.orig/ipv4/netfilter/ipt_IMQ.c linux-2.6.4-rc1/net/ipv4/netfilter/ipt_IMQ.c
491 --- linux-2.6.4-rc1/net.orig/ipv4/netfilter/ipt_IMQ.c 1970-01-01 01:00:00.000000000 +0100
492 +++ linux-2.6.4-rc1/net/ipv4/netfilter/ipt_IMQ.c 2004-03-03 03:43:30.322448640 +0100
494 +/* This target marks packets to be enqueued to an imq device */
495 +#include <linux/module.h>
496 +#include <linux/skbuff.h>
497 +#include <linux/netfilter_ipv4/ip_tables.h>
498 +#include <linux/netfilter_ipv4/ipt_IMQ.h>
499 +#include <linux/imq.h>
501 +static unsigned int imq_target(struct sk_buff **pskb,
502 + const struct net_device *in,
503 + const struct net_device *out,
504 + unsigned int hooknum,
505 + const void *targinfo,
508 + struct ipt_imq_info *mr = (struct ipt_imq_info*)targinfo;
510 + (*pskb)->imq_flags = mr->todev | IMQ_F_ENQUEUE;
511 + (*pskb)->nfcache |= NFC_ALTERED;
513 + return IPT_CONTINUE;
516 +static int imq_checkentry(const char *tablename,
517 + const struct ipt_entry *e,
519 + unsigned int targinfosize,
520 + unsigned int hook_mask)
522 + struct ipt_imq_info *mr;
524 + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_imq_info))) {
525 + printk(KERN_WARNING "IMQ: invalid targinfosize\n");
528 + mr = (struct ipt_imq_info*)targinfo;
530 + if (strcmp(tablename, "mangle") != 0) {
531 + printk(KERN_WARNING
532 + "IMQ: IMQ can only be called from \"mangle\" table, not \"%s\"\n",
537 + if (mr->todev > IMQ_MAX_DEVS) {
538 + printk(KERN_WARNING
539 + "IMQ: invalid device specified, highest is %u\n",
547 +static struct ipt_target ipt_imq_reg = {
549 + .target = imq_target,
550 + .checkentry = imq_checkentry,
554 +static int __init init(void)
556 + if (ipt_register_target(&ipt_imq_reg))
562 +static void __exit fini(void)
564 + ipt_unregister_target(&ipt_imq_reg);
569 +MODULE_LICENSE("GPL");
570 diff -uNr linux-2.6.4-rc1/net.orig/ipv4/netfilter/Kconfig linux-2.6.4-rc1/net/ipv4/netfilter/Kconfig
571 --- linux-2.6.4-rc1/net.orig/ipv4/netfilter/Kconfig 2004-03-03 03:30:33.000000000 +0100
572 +++ linux-2.6.4-rc1/net/ipv4/netfilter/Kconfig 2004-03-03 03:43:30.318449248 +0100
575 To compile it as a module, choose M here. If unsure, say N.
577 +config IP_NF_TARGET_IMQ
578 + tristate "IMQ target support"
579 + depends on IP_NF_IPTABLES
581 + This option adds a `IMQ' target which is used to specify if and
582 + to which imq device packets should get enqueued/dequeued.
584 + To compile it as a module, choose M here. If unsure, say N.
586 config IP_NF_TARGET_TCPMSS
587 tristate "TCPMSS target support"
588 depends on IP_NF_IPTABLES
589 diff -uNr linux-2.6.4-rc1/net.orig/ipv4/netfilter/Makefile linux-2.6.4-rc1/net/ipv4/netfilter/Makefile
590 --- linux-2.6.4-rc1/net.orig/ipv4/netfilter/Makefile 2004-03-03 03:30:33.000000000 +0100
591 +++ linux-2.6.4-rc1/net/ipv4/netfilter/Makefile 2004-03-03 03:43:30.321448792 +0100
593 obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
594 obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
595 obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o
596 +obj-$(CONFIG_IP_NF_TARGET_IMQ) += ipt_IMQ.o
597 obj-$(CONFIG_IP_NF_TARGET_IPMARK) += ipt_IPMARK.o
598 obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
599 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
600 diff -uNr linux-2.6.4-rc1/net.orig/sched/Kconfig linux-2.6.4-rc1/net/sched/Kconfig
601 --- linux-2.6.4-rc1/net.orig/sched/Kconfig 2004-02-27 23:21:28.000000000 +0100
602 +++ linux-2.6.4-rc1/net/sched/Kconfig 2004-03-03 03:43:30.326448032 +0100
604 To compile this code as a module, choose M here: the
605 module will be called sch_sfq.
608 + tristate "ESFQ queue"
609 + depends on NET_SCHED
611 + Say Y here if you want to use the Enhanced Stochastic Fairness
612 + Queueing (ESFQ) packet scheduling algorithm for some of your network
613 + devices or as a leaf discipline for the CBQ scheduling algorithm (see
614 + the top of <file:net/sched/sch_esfq.c> for details and references
615 + about the SFQ algorithm).
617 + This is an enchanced SFQ version which allows you to control the
618 + hardcoded values in the SFQ scheduler: queue depth, hash table size,
619 + queues limit. Also adds control to the hash function used to identify
620 + packet flows. Hash by src or dst ip and original sfq hash.
622 + To compile this code as a module, choose M here: the
623 + module will be called sch_esfq.
626 tristate "TEQL queue"
628 diff -uNr linux-2.6.4-rc1/net.orig/sched/Makefile linux-2.6.4-rc1/net/sched/Makefile
629 --- linux-2.6.4-rc1/net.orig/sched/Makefile 2004-02-27 23:21:02.000000000 +0100
630 +++ linux-2.6.4-rc1/net/sched/Makefile 2004-03-03 03:44:30.385317696 +0100
632 obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
633 obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
634 obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
635 +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
636 obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
637 obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
638 obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
639 diff -uNr linux-2.6.4-rc1/net.orig/sched/sch_esfq.c linux-2.6.4-rc1/net/sched/sch_esfq.c
640 --- linux-2.6.4-rc1/net.orig/sched/sch_esfq.c 1970-01-01 01:00:00.000000000 +0100
641 +++ linux-2.6.4-rc1/net/sched/sch_esfq.c 2004-03-03 03:43:30.332447120 +0100
644 + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
646 + * This program is free software; you can redistribute it and/or
647 + * modify it under the terms of the GNU General Public License
648 + * as published by the Free Software Foundation; either version
649 + * 2 of the License, or (at your option) any later version.
651 + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
653 + * Changes: Alexander Atanasov, <alex@ssi.bg>
654 + * Added dynamic depth,limit,divisor,hash_kind options.
655 + * Added dst and src hashes.
658 +#include <linux/config.h>
659 +#include <linux/module.h>
660 +#include <asm/uaccess.h>
661 +#include <asm/system.h>
662 +#include <asm/bitops.h>
663 +#include <linux/types.h>
664 +#include <linux/kernel.h>
665 +#include <linux/sched.h>
666 +#include <linux/string.h>
667 +#include <linux/mm.h>
668 +#include <linux/socket.h>
669 +#include <linux/sockios.h>
670 +#include <linux/in.h>
671 +#include <linux/errno.h>
672 +#include <linux/interrupt.h>
673 +#include <linux/if_ether.h>
674 +#include <linux/inet.h>
675 +#include <linux/netdevice.h>
676 +#include <linux/etherdevice.h>
677 +#include <linux/notifier.h>
678 +#include <linux/init.h>
680 +#include <linux/ipv6.h>
681 +#include <net/route.h>
682 +#include <linux/skbuff.h>
683 +#include <net/sock.h>
684 +#include <net/pkt_sched.h>
687 +/* Stochastic Fairness Queuing algorithm.
688 + For more comments look at sch_sfq.c.
689 + The difference is that you can change limit, depth,
690 + hash table size and choose 3 hash types.
692 + classic: same as in sch_sfq.c
693 + dst: destination IP address
694 + src: source IP address
697 + make sfq_change work.
701 +/* This type should contain at least SFQ_DEPTH*2 values */
702 +typedef unsigned int esfq_index;
710 +struct esfq_sched_data
713 + int perturb_period;
714 + unsigned quantum; /* Allotment per round: MUST BE >= MTU */
717 + unsigned hash_divisor;
718 + unsigned hash_kind;
720 + struct timer_list perturb_timer;
722 + esfq_index tail; /* Index of current slot in round */
723 + esfq_index max_depth; /* Maximal depth */
725 + esfq_index *ht; /* Hash table */
726 + esfq_index *next; /* Active slots link */
727 + short *allot; /* Current allotment per slot */
728 + unsigned short *hash; /* Hash value indexed by slots */
729 + struct sk_buff_head *qs; /* Slot queue */
730 + struct esfq_head *dep; /* Linked list of slots, indexed by depth */
733 +static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h)
735 + int pert = q->perturbation;
738 + h = (h<<pert) ^ (h>>(0x1F - pert));
740 + h = ntohl(h) * 2654435761UL;
741 + return h & (q->hash_divisor-1);
744 +static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1)
746 + int pert = q->perturbation;
748 + /* Have we any rotation primitives? If not, WHY? */
749 + h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
751 + return h & (q->hash_divisor-1);
755 +#define IPPROTO_ESP 50
758 +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
763 + switch (skb->protocol) {
764 + case __constant_htons(ETH_P_IP):
766 + struct iphdr *iph = skb->nh.iph;
769 + h2 = hs^iph->protocol;
770 + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
771 + (iph->protocol == IPPROTO_TCP ||
772 + iph->protocol == IPPROTO_UDP ||
773 + iph->protocol == IPPROTO_ESP))
774 + h2 ^= *(((u32*)iph) + iph->ihl);
777 + case __constant_htons(ETH_P_IPV6):
779 + struct ipv6hdr *iph = skb->nh.ipv6h;
780 + h = iph->daddr.s6_addr32[3];
781 + hs = iph->saddr.s6_addr32[3];
782 + h2 = hs^iph->nexthdr;
783 + if (iph->nexthdr == IPPROTO_TCP ||
784 + iph->nexthdr == IPPROTO_UDP ||
785 + iph->nexthdr == IPPROTO_ESP)
786 + h2 ^= *(u32*)&iph[1];
790 + h = (u32)(unsigned long)skb->dst;
791 + hs = (u32)(unsigned long)skb->sk;
792 + h2 = hs^skb->protocol;
794 + switch(q->hash_kind)
796 + case TCA_SFQ_HASH_CLASSIC:
797 + return esfq_fold_hash_classic(q, h, h2);
798 + case TCA_SFQ_HASH_DST:
799 + return esfq_hash_u32(q,h);
800 + case TCA_SFQ_HASH_SRC:
801 + return esfq_hash_u32(q,hs);
803 + if (net_ratelimit())
804 + printk(KERN_DEBUG "esfq unknown hash method, fallback to classic\n");
806 + return esfq_fold_hash_classic(q, h, h2);
809 +extern __inline__ void esfq_link(struct esfq_sched_data *q, esfq_index x)
812 + int d = q->qs[x].qlen + q->depth;
815 + n = q->dep[d].next;
816 + q->dep[x].next = n;
817 + q->dep[x].prev = p;
818 + q->dep[p].next = q->dep[n].prev = x;
821 +extern __inline__ void esfq_dec(struct esfq_sched_data *q, esfq_index x)
825 + n = q->dep[x].next;
826 + p = q->dep[x].prev;
827 + q->dep[p].next = n;
828 + q->dep[n].prev = p;
830 + if (n == p && q->max_depth == q->qs[x].qlen + 1)
836 +extern __inline__ void esfq_inc(struct esfq_sched_data *q, esfq_index x)
841 + n = q->dep[x].next;
842 + p = q->dep[x].prev;
843 + q->dep[p].next = n;
844 + q->dep[n].prev = p;
846 + if (q->max_depth < d)
852 +static unsigned int esfq_drop(struct Qdisc *sch)
854 + struct esfq_sched_data *q = qdisc_priv(sch);
855 + esfq_index d = q->max_depth;
856 + struct sk_buff *skb;
858 + /* Queue is full! Find the longest slot and
859 + drop a packet from it */
862 + esfq_index x = q->dep[d+q->depth].next;
863 + skb = q->qs[x].prev;
864 + __skb_unlink(skb, &q->qs[x]);
868 + sch->stats.drops++;
873 + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
874 + d = q->next[q->tail];
875 + q->next[q->tail] = q->next[d];
876 + q->allot[q->next[d]] += q->quantum;
877 + skb = q->qs[d].prev;
878 + __skb_unlink(skb, &q->qs[d]);
882 + q->ht[q->hash[d]] = q->depth;
883 + sch->stats.drops++;
891 +esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
893 + struct esfq_sched_data *q = qdisc_priv(sch);
894 + unsigned hash = esfq_hash(q, skb);
895 + unsigned depth = q->depth;
900 + q->ht[hash] = x = q->dep[depth].next;
903 + __skb_queue_tail(&q->qs[x], skb);
905 + if (q->qs[x].qlen == 1) { /* The flow is new */
906 + if (q->tail == depth) { /* It is the first flow */
909 + q->allot[x] = q->quantum;
911 + q->next[x] = q->next[q->tail];
912 + q->next[q->tail] = x;
916 + if (++sch->q.qlen < q->limit-1) {
917 + sch->stats.bytes += skb->len;
918 + sch->stats.packets++;
923 + return NET_XMIT_CN;
927 +esfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
929 + struct esfq_sched_data *q = qdisc_priv(sch);
930 + unsigned hash = esfq_hash(q, skb);
931 + unsigned depth = q->depth;
936 + q->ht[hash] = x = q->dep[depth].next;
939 + __skb_queue_head(&q->qs[x], skb);
941 + if (q->qs[x].qlen == 1) { /* The flow is new */
942 + if (q->tail == depth) { /* It is the first flow */
945 + q->allot[x] = q->quantum;
947 + q->next[x] = q->next[q->tail];
948 + q->next[q->tail] = x;
952 + if (++sch->q.qlen < q->limit - 1)
955 + sch->stats.drops++;
957 + return NET_XMIT_CN;
963 +static struct sk_buff *
964 +esfq_dequeue(struct Qdisc* sch)
966 + struct esfq_sched_data *q = qdisc_priv(sch);
967 + struct sk_buff *skb;
968 + unsigned depth = q->depth;
969 + esfq_index a, old_a;
971 + /* No active slots */
972 + if (q->tail == depth)
975 + a = old_a = q->next[q->tail];
978 + skb = __skb_dequeue(&q->qs[a]);
982 + /* Is the slot empty? */
983 + if (q->qs[a].qlen == 0) {
989 + q->next[q->tail] = a;
990 + q->allot[a] += q->quantum;
991 + } else if ((q->allot[a] -= skb->len) <= 0) {
994 + q->allot[a] += q->quantum;
1001 +esfq_reset(struct Qdisc* sch)
1003 + struct sk_buff *skb;
1005 + while ((skb = esfq_dequeue(sch)) != NULL)
1009 +static void esfq_perturbation(unsigned long arg)
1011 + struct Qdisc *sch = (struct Qdisc*)arg;
1012 + struct esfq_sched_data *q = qdisc_priv(sch);
1014 + q->perturbation = net_random()&0x1F;
1015 + q->perturb_timer.expires = jiffies + q->perturb_period;
1017 + if (q->perturb_period) {
1018 + q->perturb_timer.expires = jiffies + q->perturb_period;
1019 + add_timer(&q->perturb_timer);
1023 +static int esfq_change(struct Qdisc *sch, struct rtattr *opt)
1025 + struct esfq_sched_data *q = qdisc_priv(sch);
1026 + struct tc_sfq_qopt *ctl = RTA_DATA(opt);
1027 + int old_perturb = q->perturb_period;
1029 + if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
1032 + sch_tree_lock(sch);
1033 + q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
1034 + q->perturb_period = ctl->perturb_period*HZ;
1035 +// q->hash_divisor = ctl->divisor;
1036 +// q->tail = q->limit = q->depth = ctl->flows;
1039 + q->limit = min_t(u32, ctl->limit, q->depth);
1041 + if (ctl->hash_kind) {
1042 + q->hash_kind = ctl->hash_kind;
1043 + if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
1044 + q->perturb_period = 0;
1047 + // is sch_tree_lock enough to do this ?
1048 + while (sch->q.qlen >= q->limit-1)
1052 + del_timer(&q->perturb_timer);
1053 + if (q->perturb_period) {
1054 + q->perturb_timer.expires = jiffies + q->perturb_period;
1055 + add_timer(&q->perturb_timer);
1057 + q->perturbation = 0;
1059 + sch_tree_unlock(sch);
1063 +static int esfq_init(struct Qdisc *sch, struct rtattr *opt)
1065 + struct esfq_sched_data *q = qdisc_priv(sch);
1066 + struct tc_sfq_qopt *ctl;
1067 + esfq_index p = ~0UL/2;
1070 + if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
1073 + q->perturb_timer.data = (unsigned long)sch;
1074 + q->perturb_timer.function = esfq_perturbation;
1075 + init_timer(&q->perturb_timer);
1076 + q->perturbation = 0;
1077 + q->hash_kind = TCA_SFQ_HASH_CLASSIC;
1079 + if (opt == NULL) {
1080 + q->quantum = psched_mtu(sch->dev);
1081 + q->perturb_period = 0;
1082 + q->hash_divisor = 1024;
1083 + q->tail = q->limit = q->depth = 128;
1086 + ctl = RTA_DATA(opt);
1087 + q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
1088 + q->perturb_period = ctl->perturb_period*HZ;
1089 + q->hash_divisor = ctl->divisor ? : 1024;
1090 + q->tail = q->limit = q->depth = ctl->flows ? : 128;
1092 + if ( q->depth > p - 1 )
1096 + q->limit = min_t(u32, ctl->limit, q->depth);
1098 + if (ctl->hash_kind) {
1099 + q->hash_kind = ctl->hash_kind;
1102 + if (q->perturb_period) {
1103 + q->perturb_timer.expires = jiffies + q->perturb_period;
1104 + add_timer(&q->perturb_timer);
1108 + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
1112 + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
1115 + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
1119 + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
1122 + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
1125 + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
1129 + for (i=0; i< q->hash_divisor; i++)
1130 + q->ht[i] = q->depth;
1131 + for (i=0; i<q->depth; i++) {
1132 + skb_queue_head_init(&q->qs[i]);
1133 + q->dep[i+q->depth].next = i+q->depth;
1134 + q->dep[i+q->depth].prev = i+q->depth;
1137 + for (i=0; i<q->depth; i++)
1156 +static void esfq_destroy(struct Qdisc *sch)
1158 + struct esfq_sched_data *q = qdisc_priv(sch);
1159 + del_timer(&q->perturb_timer);
1174 +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
1176 + struct esfq_sched_data *q = qdisc_priv(sch);
1177 + unsigned char *b = skb->tail;
1178 + struct tc_sfq_qopt opt;
1180 + opt.quantum = q->quantum;
1181 + opt.perturb_period = q->perturb_period/HZ;
1183 + opt.limit = q->limit;
1184 + opt.divisor = q->hash_divisor;
1185 + opt.flows = q->depth;
1186 + opt.hash_kind = q->hash_kind;
1188 + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1193 + skb_trim(skb, b - skb->data);
1197 +struct Qdisc_ops esfq_qdisc_ops =
1202 + sizeof(struct esfq_sched_data),
1212 + NULL, /* esfq_change - needs more work */
1218 +int init_module(void)
1220 + return register_qdisc(&esfq_qdisc_ops);
1223 +void cleanup_module(void)
1225 + unregister_qdisc(&esfq_qdisc_ops);
1228 +MODULE_LICENSE("GPL");
1229 diff -uNr linux-2.6.4-rc1/net.orig/sched/sch_generic.c linux-2.6.4-rc1/net/sched/sch_generic.c
1230 --- linux-2.6.4-rc1/net.orig/sched/sch_generic.c 2004-02-27 23:20:56.000000000 +0100
1231 +++ linux-2.6.4-rc1/net/sched/sch_generic.c 2004-03-03 03:43:30.336446512 +0100
1233 #include <linux/skbuff.h>
1234 #include <linux/rtnetlink.h>
1235 #include <linux/init.h>
1236 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1237 +#include <linux/imq.h>
1239 #include <linux/rcupdate.h>
1240 #include <linux/list.h>
1241 #include <net/sock.h>
1242 @@ -135,11 +138,14 @@
1243 spin_unlock(&dev->queue_lock);
1245 if (!netif_queue_stopped(dev)) {
1247 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1248 + if (netdev_nit && !(skb->imq_flags & IMQ_F_ENQUEUE))
1252 dev_queue_xmit_nit(skb, dev);
1254 - ret = dev->hard_start_xmit(skb, dev);
1255 + int ret = dev->hard_start_xmit(skb, dev);
1256 if (ret == NETDEV_TX_OK) {
1258 dev->xmit_lock_owner = -1;