diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/drivers/net/Kconfig linux-2.6.0-test11/drivers/net/Kconfig --- linux-2.6.0-test11.orig/drivers/net/Kconfig 2003-11-30 20:43:42.000000000 +0000 +++ linux-2.6.0-test11/drivers/net/Kconfig 2003-12-02 23:28:09.000000000 +0000 @@ -85,6 +85,20 @@ To compile this driver as a module, choose M here: the module will be called eql. If unsure, say N. +config IMQ + tristate "IMQ (intermediate queueing device) support" + depends on NETDEVICES && NETFILTER + ---help--- + The imq device(s) is used as placeholder for QoS queueing disciplines. + Every packet entering/leaving the ip stack can be directed through + the imq device where it's enqueued/dequeued to the attached qdisc. + This allows you to treat network devices as classes and distribute + bandwidth among them. Iptables is used to specify through which imq + device, if any, packets travel. + + To compile this driver as a module, choose M here: the module + will be called imq. If unsure, say N. + config TUN tristate "Universal TUN/TAP device driver support" depends on NETDEVICES diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/drivers/net/Makefile linux-2.6.0-test11/drivers/net/Makefile --- linux-2.6.0-test11.orig/drivers/net/Makefile 2003-11-30 20:43:42.000000000 +0000 +++ linux-2.6.0-test11/drivers/net/Makefile 2003-12-02 19:54:09.000000000 +0000 @@ -109,6 +109,7 @@ endif obj-$(CONFIG_DUMMY) += dummy.o +obj-$(CONFIG_IMQ) += imq.o obj-$(CONFIG_DE600) += de600.o obj-$(CONFIG_DE620) += de620.o obj-$(CONFIG_AT1500) += lance.o diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/drivers/net/imq.c linux-2.6.0-test11/drivers/net/imq.c --- linux-2.6.0-test11.orig/drivers/net/imq.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test11/drivers/net/imq.c 2003-12-02 23:52:55.000000000 +0000 @@ -0,0 +1,321 @@ +/* + * Pseudo-driver for the intermediate queue device. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Patrick McHardy, + * + * The first version was written by Martin Devera, + * + * Credits: Jan Rafaj + * - Update patch to 2.4.21 + * Sebastian Strollo + * - Fix "Dead-loop on netdevice imq"-issue + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#endif +#include +#include + +static nf_hookfn imq_nf_hook; + +static struct nf_hook_ops imq_ingress_ipv4 = { + .hook = imq_nf_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_MANGLE + 1 +}; + +static struct nf_hook_ops imq_egress_ipv4 = { + .hook = imq_nf_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_LAST +}; + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +static struct nf_hook_ops imq_ingress_ipv6 = { + .hook = imq_nf_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_MANGLE + 1 +}; + +static struct nf_hook_ops imq_egress_ipv6 = { + .hook = imq_nf_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_LAST +}; +#endif + +static unsigned int numdevs = 2; + +MODULE_PARM(numdevs, "i"); +MODULE_PARM_DESC(numdevs, "number of imq devices"); + +static struct net_device *imq_devs; + + +static struct net_device_stats *imq_get_stats(struct net_device *dev) +{ + return (struct net_device_stats *)dev->priv; +} + +/* called for packets kfree'd in qdiscs at places other than enqueue */ +static void imq_skb_destructor(struct sk_buff *skb) +{ + struct nf_info *info = skb->nf_info; + + if (info) { + if (info->indev) + dev_put(info->indev); + if (info->outdev) + dev_put(info->outdev); + kfree(info); + } +} + +static int imq_dev_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_device_stats *stats = (struct net_device_stats*) dev->priv; + + stats->tx_bytes += skb->len; + stats->tx_packets++; + + skb->imq_flags = 0; + skb->destructor = NULL; + + dev->trans_start = jiffies; + nf_reinject(skb, skb->nf_info, NF_ACCEPT); + return 0; +} + +static int imq_nf_queue(struct sk_buff *skb, struct nf_info *info, + void *data) +{ + struct net_device *dev; + struct net_device_stats *stats; + struct sk_buff *skb2 = NULL; + struct Qdisc *q; + unsigned int index = skb->imq_flags&IMQ_F_IFMASK; + int ret = -1; + + if (index > numdevs) + return -1; + + dev = imq_devs + index; + if (!(dev->flags & IFF_UP)) { + skb->imq_flags = 0; + nf_reinject(skb, info, NF_ACCEPT); + return 0; + } + dev->last_rx = jiffies; + + if (skb->destructor) { + skb2 = skb; + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return -1; + } + skb->nf_info = info; + + stats = (struct net_device_stats *)dev->priv; + stats->rx_bytes+= skb->len; + stats->rx_packets++; + + spin_lock_bh(&dev->queue_lock); + q = dev->qdisc; + if (q->enqueue) { + q->enqueue(skb_get(skb), q); + if (skb_shared(skb)) { + skb->destructor = imq_skb_destructor; + kfree_skb(skb); + ret = 0; + } + } + if (spin_is_locked(&dev->xmit_lock)) + netif_schedule(dev); + else + qdisc_run(dev); + spin_unlock_bh(&dev->queue_lock); + + if (skb2) + kfree_skb(ret ? skb : skb2); + + return ret; +} + +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff **pskb, + const struct net_device *indev, + const struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + if ((*pskb)->imq_flags & IMQ_F_ENQUEUE) + return NF_QUEUE; + + return NF_ACCEPT; +} + + +static int __init imq_init_hooks(void) +{ + int err; + + if ((err = nf_register_queue_handler(PF_INET, imq_nf_queue, NULL))) + goto err1; + if ((err = nf_register_hook(&imq_ingress_ipv4))) + goto err2; + if ((err = nf_register_hook(&imq_egress_ipv4))) + goto err3; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if ((err = nf_register_queue_handler(PF_INET6, imq_nf_queue, NULL))) + goto err4; + if ((err = nf_register_hook(&imq_ingress_ipv6))) + goto err5; + if ((err = nf_register_hook(&imq_egress_ipv6))) + goto err6; +#endif + + return 0; + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +err6: + nf_unregister_hook(&imq_ingress_ipv6); +err5: + nf_unregister_queue_handler(PF_INET6); +err4: + nf_unregister_hook(&imq_egress_ipv4); +#endif +err3: + nf_unregister_hook(&imq_ingress_ipv4); +err2: + nf_unregister_queue_handler(PF_INET); +err1: + return err; +} + +static void __exit imq_unhook(void) +{ + nf_unregister_hook(&imq_ingress_ipv4); + nf_unregister_hook(&imq_egress_ipv4); + nf_unregister_queue_handler(PF_INET); +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + nf_unregister_hook(&imq_ingress_ipv6); + nf_unregister_hook(&imq_egress_ipv6); + nf_unregister_queue_handler(PF_INET6); +#endif +} + +static int __init imq_dev_init(struct net_device *dev) +{ + dev->hard_start_xmit = imq_dev_xmit; + dev->type = ARPHRD_VOID; + dev->mtu = 1500; + dev->tx_queue_len = 30; + dev->flags = IFF_NOARP; + dev->priv = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); + if (dev->priv == NULL) + return -ENOMEM; + memset(dev->priv, 0, sizeof(struct net_device_stats)); + dev->get_stats = imq_get_stats; + + return 0; +} + +static void imq_dev_uninit(struct net_device *dev) +{ + kfree(dev->priv); +} + +static int __init imq_init_devs(void) +{ + struct net_device *dev; + int i; + + if (!numdevs || numdevs > IMQ_MAX_DEVS) { + printk(KERN_ERR "numdevs has to be betweed 1 and %u\n", + IMQ_MAX_DEVS); + return -EINVAL; + } + + imq_devs = kmalloc(sizeof(struct net_device) * numdevs, GFP_KERNEL); + if (!imq_devs) + return -ENOMEM; + memset(imq_devs, 0, sizeof(struct net_device) * numdevs); + + /* we start counting at zero */ + numdevs--; + + for (i = 0, dev = imq_devs; i <= numdevs; i++, dev++) { + SET_MODULE_OWNER(dev); + strcpy(dev->name, "imq%d"); + dev->init = imq_dev_init; + dev->uninit = imq_dev_uninit; + + if (register_netdev(dev) < 0) + goto err_register; + } + return 0; + +err_register: + for (; i; i--) + unregister_netdev(--dev); + kfree(imq_devs); + return -EIO; +} + +static void imq_cleanup_devs(void) +{ + int i; + struct net_device *dev = imq_devs; + + for (i = 0; i <= numdevs; i++) + unregister_netdev(dev++); + + kfree(imq_devs); +} + +static int __init imq_init_module(void) +{ + int err; + + if ((err = imq_init_devs())) + return err; + if ((err = imq_init_hooks())) { + imq_cleanup_devs(); + return err; + } + + printk(KERN_INFO "imq driver loaded.\n"); + + return 0; +} + +static void __exit imq_cleanup_module(void) +{ + imq_unhook(); + imq_cleanup_devs(); +} + +module_init(imq_init_module); +module_exit(imq_cleanup_module); +MODULE_LICENSE("GPL"); diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/include/linux/imq.h linux-2.6.0-test11/include/linux/imq.h --- linux-2.6.0-test11.orig/include/linux/imq.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test11/include/linux/imq.h 2003-12-02 19:54:09.000000000 +0000 @@ -0,0 +1,9 @@ +#ifndef _IMQ_H +#define _IMQ_H + +#define IMQ_MAX_DEVS 16 + +#define IMQ_F_IFMASK 0x7f +#define IMQ_F_ENQUEUE 0x80 + +#endif /* _IMQ_H */ diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/include/linux/netfilter_ipv4/ipt_IMQ.h linux-2.6.0-test11/include/linux/netfilter_ipv4/ipt_IMQ.h --- linux-2.6.0-test11.orig/include/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test11/include/linux/netfilter_ipv4/ipt_IMQ.h 2003-12-02 19:54:06.000000000 +0000 @@ -0,0 +1,8 @@ +#ifndef _IPT_IMQ_H +#define _IPT_IMQ_H + +struct ipt_imq_info { + unsigned int todev; /* target imq device */ +}; + +#endif /* _IPT_IMQ_H */ diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/include/linux/pkt_sched.h linux-2.6.0-test11/include/linux/pkt_sched.h --- linux-2.6.0-test11.orig/include/linux/pkt_sched.h 2003-11-30 20:43:31.000000000 +0000 +++ linux-2.6.0-test11/include/linux/pkt_sched.h 2003-12-02 19:53:57.000000000 +0000 @@ -157,6 +157,13 @@ /* SFQ section */ +enum +{ + TCA_SFQ_HASH_CLASSIC, + TCA_SFQ_HASH_DST, + TCA_SFQ_HASH_SRC, +}; + struct tc_sfq_qopt { unsigned quantum; /* Bytes per round allocated to flow */ @@ -164,6 +171,7 @@ __u32 limit; /* Maximal packets in queue */ unsigned divisor; /* Hash divisor */ unsigned flows; /* Maximal number of flows */ + unsigned hash_kind; /* Hash function to use for flow identification */ }; /* @@ -173,6 +181,8 @@ * * The only reason for this is efficiency, it is possible * to change these parameters in compile time. + * + * If you need to play with this values use esfq. */ /* RED section */ diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/include/linux/skbuff.h linux-2.6.0-test11/include/linux/skbuff.h --- linux-2.6.0-test11.orig/include/linux/skbuff.h 2003-11-30 20:43:31.000000000 +0000 +++ linux-2.6.0-test11/include/linux/skbuff.h 2003-12-02 19:54:09.000000000 +0000 @@ -112,6 +112,9 @@ #endif #endif +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) +struct nf_info; +#endif struct sk_buff_head { /* These two members must be first. */ @@ -234,6 +237,7 @@ data_len, csum; unsigned char local_df, + imq_flags, cloned, pkt_type, ip_summed; @@ -261,6 +265,9 @@ #ifdef CONFIG_NET_SCHED __u32 tc_index; /* traffic control index */ #endif +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) + struct nf_info *nf_info; +#endif /* These elements must be at the end, see alloc_skb() for details. */ unsigned int truesize; diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/core/skbuff.c linux-2.6.0-test11/net/core/skbuff.c --- linux-2.6.0-test11.orig/net/core/skbuff.c 2003-11-30 20:43:52.000000000 +0000 +++ linux-2.6.0-test11/net/core/skbuff.c 2003-12-02 19:54:09.000000000 +0000 @@ -152,6 +152,13 @@ skb_shinfo(skb)->tso_size = 0; skb_shinfo(skb)->tso_segs = 0; skb_shinfo(skb)->frag_list = NULL; + +/* probably doomed to failure */ +#if defined(CONFIG_IMQ) || defined (CONFIG_IMQ_MODULE) + skb->imq_flags = 0; + skb->nf_info = NULL; +#endif + out: return skb; nodata: @@ -313,6 +320,10 @@ #ifdef CONFIG_NET_SCHED C(tc_index); #endif +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) + C(imq_flags); + C(nf_info); +#endif C(truesize); atomic_set(&n->users, 1); C(head); @@ -368,6 +379,10 @@ #ifdef CONFIG_NET_SCHED new->tc_index = old->tc_index; #endif +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) + new->imq_flags=old->imq_flags; + new->nf_info=old->nf_info; +#endif atomic_set(&new->users, 1); } diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/ipv4/netfilter/Kconfig linux-2.6.0-test11/net/ipv4/netfilter/Kconfig --- linux-2.6.0-test11.orig/net/ipv4/netfilter/Kconfig 2003-11-30 20:43:52.000000000 +0000 +++ linux-2.6.0-test11/net/ipv4/netfilter/Kconfig 2003-12-02 19:54:06.000000000 +0000 @@ -501,6 +501,15 @@ To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_IMQ + tristate "IMQ target support" + depends on IP_NF_IPTABLES + ---help--- + This option adds a `IMQ' target which is used to specify if and + to which imq device packets should get enqueued/dequeued. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_TCPMSS tristate "TCPMSS target support" depends on IP_NF_IPTABLES diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/ipv4/netfilter/Makefile linux-2.6.0-test11/net/ipv4/netfilter/Makefile --- linux-2.6.0-test11.orig/net/ipv4/netfilter/Makefile 2003-11-30 20:43:52.000000000 +0000 +++ linux-2.6.0-test11/net/ipv4/netfilter/Makefile 2003-12-02 19:54:06.000000000 +0000 @@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o +obj-$(CONFIG_IP_NF_TARGET_IMQ) += ipt_IMQ.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/ipv4/netfilter/ipt_IMQ.c linux-2.6.0-test11/net/ipv4/netfilter/ipt_IMQ.c --- linux-2.6.0-test11.orig/net/ipv4/netfilter/ipt_IMQ.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test11/net/ipv4/netfilter/ipt_IMQ.c 2003-12-03 00:01:18.000000000 +0000 @@ -0,0 +1,76 @@ +/* This target marks packets to be enqueued to an imq device */ +#include +#include +#include +#include +#include + +static unsigned int imq_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + struct ipt_imq_info *mr = (struct ipt_imq_info*)targinfo; + + (*pskb)->imq_flags = mr->todev | IMQ_F_ENQUEUE; + (*pskb)->nfcache |= NFC_ALTERED; + + return IPT_CONTINUE; +} + +static int imq_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_imq_info *mr; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_imq_info))) { + printk(KERN_WARNING "IMQ: invalid targinfosize\n"); + return 0; + } + mr = (struct ipt_imq_info*)targinfo; + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING + "IMQ: IMQ can only be called from \"mangle\" table, not \"%s\"\n", + tablename); + return 0; + } + + if (mr->todev > IMQ_MAX_DEVS) { + printk(KERN_WARNING + "IMQ: invalid device specified, highest is %u\n", + IMQ_MAX_DEVS); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_imq_reg = { + .name = "IMQ", + .target = imq_target, + .checkentry = imq_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_imq_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_imq_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/sched/Kconfig linux-2.6.0-test11/net/sched/Kconfig --- linux-2.6.0-test11.orig/net/sched/Kconfig 2003-11-30 20:43:54.000000000 +0000 +++ linux-2.6.0-test11/net/sched/Kconfig 2003-12-02 19:53:57.000000000 +0000 @@ -105,6 +105,24 @@ To compile this code as a module, choose M here: the module will be called sch_sfq. +config NET_SCH_ESFQ + tristate "ESFQ queue" + depends on NET_SCHED + ---help--- + Say Y here if you want to use the Enhanced Stochastic Fairness + Queueing (ESFQ) packet scheduling algorithm for some of your network + devices or as a leaf discipline for the CBQ scheduling algorithm (see + the top of for details and references + about the SFQ algorithm). + + This is an enchanced SFQ version which allows you to control the + hardcoded values in the SFQ scheduler: queue depth, hash table size, + queues limit. Also adds control to the hash function used to identify + packet flows. Hash by src or dst ip and original sfq hash. + + To compile this code as a module, choose M here: the + module will be called sch_esfq. + config NET_SCH_TEQL tristate "TEQL queue" depends on NET_SCHED diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/sched/Makefile linux-2.6.0-test11/net/sched/Makefile --- linux-2.6.0-test11.orig/net/sched/Makefile 2003-11-30 20:43:54.000000000 +0000 +++ linux-2.6.0-test11/net/sched/Makefile 2003-12-02 19:53:57.000000000 +0000 @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o +obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/sched/sch_api.c linux-2.6.0-test11/net/sched/sch_api.c --- linux-2.6.0-test11.orig/net/sched/sch_api.c 2003-11-30 20:43:54.000000000 +0000 +++ linux-2.6.0-test11/net/sched/sch_api.c 2003-12-02 19:53:57.000000000 +0000 @@ -1235,6 +1235,9 @@ #ifdef CONFIG_NET_SCH_SFQ INIT_QDISC(sfq); #endif +#ifdef CONFIG_NET_SCH_ESFQ + INIT_QDISC(esfq); +#endif #ifdef CONFIG_NET_SCH_TBF INIT_QDISC(tbf); #endif diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/sched/sch_esfq.c linux-2.6.0-test11/net/sched/sch_esfq.c --- linux-2.6.0-test11.orig/net/sched/sch_esfq.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test11/net/sched/sch_esfq.c 2003-12-03 00:18:29.000000000 +0000 @@ -0,0 +1,588 @@ +/* + * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, + * + * Changes: Alexander Atanasov, + * Added dynamic depth,limit,divisor,hash_kind options. + * Added dst and src hashes. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Stochastic Fairness Queuing algorithm. + For more comments look at sch_sfq.c. + The difference is that you can change limit, depth, + hash table size and choose 3 hash types. + + classic: same as in sch_sfq.c + dst: destination IP address + src: source IP address + + TODO: + make sfq_change work. +*/ + + +/* This type should contain at least SFQ_DEPTH*2 values */ +typedef unsigned int esfq_index; + +struct esfq_head +{ + esfq_index next; + esfq_index prev; +}; + +struct esfq_sched_data +{ +/* Parameters */ + int perturb_period; + unsigned quantum; /* Allotment per round: MUST BE >= MTU */ + int limit; + unsigned depth; + unsigned hash_divisor; + unsigned hash_kind; +/* Variables */ + struct timer_list perturb_timer; + int perturbation; + esfq_index tail; /* Index of current slot in round */ + esfq_index max_depth; /* Maximal depth */ + + esfq_index *ht; /* Hash table */ + esfq_index *next; /* Active slots link */ + short *allot; /* Current allotment per slot */ + unsigned short *hash; /* Hash value indexed by slots */ + struct sk_buff_head *qs; /* Slot queue */ + struct esfq_head *dep; /* Linked list of slots, indexed by depth */ +}; + +static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h) +{ + int pert = q->perturbation; + + if (pert) + h = (h<>(0x1F - pert)); + + h = ntohl(h) * 2654435761UL; + return h & (q->hash_divisor-1); +} + +static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1) +{ + int pert = q->perturbation; + + /* Have we any rotation primitives? If not, WHY? */ + h ^= (h1<>(0x1F - pert)); + h ^= h>>10; + return h & (q->hash_divisor-1); +} + +#ifndef IPPROTO_ESP +#define IPPROTO_ESP 50 +#endif + +static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb) +{ + u32 h, h2; + u32 hs; + + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + { + struct iphdr *iph = skb->nh.iph; + h = iph->daddr; + hs = iph->saddr; + h2 = hs^iph->protocol; + if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && + (iph->protocol == IPPROTO_TCP || + iph->protocol == IPPROTO_UDP || + iph->protocol == IPPROTO_ESP)) + h2 ^= *(((u32*)iph) + iph->ihl); + break; + } + case __constant_htons(ETH_P_IPV6): + { + struct ipv6hdr *iph = skb->nh.ipv6h; + h = iph->daddr.s6_addr32[3]; + hs = iph->saddr.s6_addr32[3]; + h2 = hs^iph->nexthdr; + if (iph->nexthdr == IPPROTO_TCP || + iph->nexthdr == IPPROTO_UDP || + iph->nexthdr == IPPROTO_ESP) + h2 ^= *(u32*)&iph[1]; + break; + } + default: + h = (u32)(unsigned long)skb->dst; + hs = (u32)(unsigned long)skb->sk; + h2 = hs^skb->protocol; + } + switch(q->hash_kind) + { + case TCA_SFQ_HASH_CLASSIC: + return esfq_fold_hash_classic(q, h, h2); + case TCA_SFQ_HASH_DST: + return esfq_hash_u32(q,h); + case TCA_SFQ_HASH_SRC: + return esfq_hash_u32(q,hs); + default: + if (net_ratelimit()) + printk(KERN_DEBUG "esfq unknown hash method, fallback to classic\n"); + } + return esfq_fold_hash_classic(q, h, h2); +} + +extern __inline__ void esfq_link(struct esfq_sched_data *q, esfq_index x) +{ + esfq_index p, n; + int d = q->qs[x].qlen + q->depth; + + p = d; + n = q->dep[d].next; + q->dep[x].next = n; + q->dep[x].prev = p; + q->dep[p].next = q->dep[n].prev = x; +} + +extern __inline__ void esfq_dec(struct esfq_sched_data *q, esfq_index x) +{ + esfq_index p, n; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + + if (n == p && q->max_depth == q->qs[x].qlen + 1) + q->max_depth--; + + esfq_link(q, x); +} + +extern __inline__ void esfq_inc(struct esfq_sched_data *q, esfq_index x) +{ + esfq_index p, n; + int d; + + n = q->dep[x].next; + p = q->dep[x].prev; + q->dep[p].next = n; + q->dep[n].prev = p; + d = q->qs[x].qlen; + if (q->max_depth < d) + q->max_depth = d; + + esfq_link(q, x); +} + +static unsigned int esfq_drop(struct Qdisc *sch) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + esfq_index d = q->max_depth; + struct sk_buff *skb; + + /* Queue is full! Find the longest slot and + drop a packet from it */ + + if (d > 1) { + esfq_index x = q->dep[d+q->depth].next; + skb = q->qs[x].prev; + __skb_unlink(skb, &q->qs[x]); + kfree_skb(skb); + esfq_dec(q, x); + sch->q.qlen--; + sch->stats.drops++; + return 1; + } + + if (d == 1) { + /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ + d = q->next[q->tail]; + q->next[q->tail] = q->next[d]; + q->allot[q->next[d]] += q->quantum; + skb = q->qs[d].prev; + __skb_unlink(skb, &q->qs[d]); + kfree_skb(skb); + esfq_dec(q, d); + sch->q.qlen--; + q->ht[q->hash[d]] = q->depth; + sch->stats.drops++; + return 1; + } + + return 0; +} + +static int +esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + unsigned hash = esfq_hash(q, skb); + unsigned depth = q->depth; + esfq_index x; + + x = q->ht[hash]; + if (x == depth) { + q->ht[hash] = x = q->dep[depth].next; + q->hash[x] = hash; + } + __skb_queue_tail(&q->qs[x], skb); + esfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == depth) { /* It is the first flow */ + q->tail = x; + q->next[x] = x; + q->allot[x] = q->quantum; + } else { + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; + q->tail = x; + } + } + if (++sch->q.qlen < q->limit-1) { + sch->stats.bytes += skb->len; + sch->stats.packets++; + return 0; + } + + esfq_drop(sch); + return NET_XMIT_CN; +} + +static int +esfq_requeue(struct sk_buff *skb, struct Qdisc* sch) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + unsigned hash = esfq_hash(q, skb); + unsigned depth = q->depth; + esfq_index x; + + x = q->ht[hash]; + if (x == depth) { + q->ht[hash] = x = q->dep[depth].next; + q->hash[x] = hash; + } + __skb_queue_head(&q->qs[x], skb); + esfq_inc(q, x); + if (q->qs[x].qlen == 1) { /* The flow is new */ + if (q->tail == depth) { /* It is the first flow */ + q->tail = x; + q->next[x] = x; + q->allot[x] = q->quantum; + } else { + q->next[x] = q->next[q->tail]; + q->next[q->tail] = x; + q->tail = x; + } + } + if (++sch->q.qlen < q->limit - 1) + return 0; + + sch->stats.drops++; + esfq_drop(sch); + return NET_XMIT_CN; +} + + + + +static struct sk_buff * +esfq_dequeue(struct Qdisc* sch) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + struct sk_buff *skb; + unsigned depth = q->depth; + esfq_index a, old_a; + + /* No active slots */ + if (q->tail == depth) + return NULL; + + a = old_a = q->next[q->tail]; + + /* Grab packet */ + skb = __skb_dequeue(&q->qs[a]); + esfq_dec(q, a); + sch->q.qlen--; + + /* Is the slot empty? */ + if (q->qs[a].qlen == 0) { + a = q->next[a]; + if (a == old_a) { + q->tail = depth; + return skb; + } + q->next[q->tail] = a; + q->allot[a] += q->quantum; + } else if ((q->allot[a] -= skb->len) <= 0) { + q->tail = a; + a = q->next[a]; + q->allot[a] += q->quantum; + } + + return skb; +} + +static void +esfq_reset(struct Qdisc* sch) +{ + struct sk_buff *skb; + + while ((skb = esfq_dequeue(sch)) != NULL) + kfree_skb(skb); +} + +static void esfq_perturbation(unsigned long arg) +{ + struct Qdisc *sch = (struct Qdisc*)arg; + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + + q->perturbation = net_random()&0x1F; + q->perturb_timer.expires = jiffies + q->perturb_period; + + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } +} + +static int esfq_change(struct Qdisc *sch, struct rtattr *opt) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + struct tc_sfq_qopt *ctl = RTA_DATA(opt); + int old_perturb = q->perturb_period; + + if (opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + return -EINVAL; + + sch_tree_lock(sch); + q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->perturb_period = ctl->perturb_period*HZ; +// q->hash_divisor = ctl->divisor; +// q->tail = q->limit = q->depth = ctl->flows; + + if (ctl->limit) + q->limit = min_t(u32, ctl->limit, q->depth); + + if (ctl->hash_kind) { + q->hash_kind = ctl->hash_kind; + if (q->hash_kind != TCA_SFQ_HASH_CLASSIC) + q->perturb_period = 0; + } + + // is sch_tree_lock enough to do this ? + while (sch->q.qlen >= q->limit-1) + esfq_drop(sch); + + if (old_perturb) + del_timer(&q->perturb_timer); + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } else { + q->perturbation = 0; + } + sch_tree_unlock(sch); + return 0; +} + +static int esfq_init(struct Qdisc *sch, struct rtattr *opt) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + struct tc_sfq_qopt *ctl; + esfq_index p = ~0UL/2; + int i; + + if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl))) + return -EINVAL; + + q->perturb_timer.data = (unsigned long)sch; + q->perturb_timer.function = esfq_perturbation; + init_timer(&q->perturb_timer); + q->perturbation = 0; + q->hash_kind = TCA_SFQ_HASH_CLASSIC; + q->max_depth = 0; + if (opt == NULL) { + q->quantum = psched_mtu(sch->dev); + q->perturb_period = 0; + q->hash_divisor = 1024; + q->tail = q->limit = q->depth = 128; + + } else { + ctl = RTA_DATA(opt); + q->quantum = ctl->quantum ? : psched_mtu(sch->dev); + q->perturb_period = ctl->perturb_period*HZ; + q->hash_divisor = ctl->divisor ? : 1024; + q->tail = q->limit = q->depth = ctl->flows ? : 128; + + if ( q->depth > p - 1 ) + return -EINVAL; + + if (ctl->limit) + q->limit = min_t(u32, ctl->limit, q->depth); + + if (ctl->hash_kind) { + q->hash_kind = ctl->hash_kind; + } + + if (q->perturb_period) { + q->perturb_timer.expires = jiffies + q->perturb_period; + add_timer(&q->perturb_timer); + } + } + + q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL); + if (!q->ht) + goto err_case; + + q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL); + if (!q->dep) + goto err_case; + q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL); + if (!q->next) + goto err_case; + + q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL); + if (!q->allot) + goto err_case; + q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL); + if (!q->hash) + goto err_case; + q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL); + if (!q->qs) + goto err_case; + + for (i=0; i< q->hash_divisor; i++) + q->ht[i] = q->depth; + for (i=0; idepth; i++) { + skb_queue_head_init(&q->qs[i]); + q->dep[i+q->depth].next = i+q->depth; + q->dep[i+q->depth].prev = i+q->depth; + } + + for (i=0; idepth; i++) + esfq_link(q, i); + MOD_INC_USE_COUNT; + return 0; +err_case: + if (q->ht) + kfree(q->ht); + if (q->dep) + kfree(q->dep); + if (q->next) + kfree(q->next); + if (q->allot) + kfree(q->allot); + if (q->hash) + kfree(q->hash); + if (q->qs) + kfree(q->qs); + return -ENOBUFS; +} + +static void esfq_destroy(struct Qdisc *sch) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + del_timer(&q->perturb_timer); + if(q->ht) + kfree(q->ht); + if(q->dep) + kfree(q->dep); + if(q->next) + kfree(q->next); + if(q->allot) + kfree(q->allot); + if(q->hash) + kfree(q->hash); + if(q->qs) + kfree(q->qs); + MOD_DEC_USE_COUNT; +} + +static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data; + unsigned char *b = skb->tail; + struct tc_sfq_qopt opt; + + opt.quantum = q->quantum; + opt.perturb_period = q->perturb_period/HZ; + + opt.limit = q->limit; + opt.divisor = q->hash_divisor; + opt.flows = q->depth; + opt.hash_kind = q->hash_kind; + + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +struct Qdisc_ops esfq_qdisc_ops = +{ + NULL, + NULL, + "esfq", + sizeof(struct esfq_sched_data), + + esfq_enqueue, + esfq_dequeue, + esfq_requeue, + esfq_drop, + + esfq_init, + esfq_reset, + esfq_destroy, + NULL, /* esfq_change - needs more work */ + + esfq_dump, +}; + +#ifdef MODULE +int init_module(void) +{ + return register_qdisc(&esfq_qdisc_ops); +} + +void cleanup_module(void) +{ + unregister_qdisc(&esfq_qdisc_ops); +} +#endif +MODULE_LICENSE("GPL"); diff -u -U 2 -r -N -d linux-2.6.0-test11.orig/net/sched/sch_generic.c linux-2.6.0-test11/net/sched/sch_generic.c --- linux-2.6.0-test11.orig/net/sched/sch_generic.c 2003-11-30 20:43:54.000000000 +0000 +++ linux-2.6.0-test11/net/sched/sch_generic.c 2003-12-02 19:54:09.000000000 +0000 @@ -30,6 +30,9 @@ #include #include #include +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) +#include +#endif #include #include @@ -90,7 +93,11 @@ spin_unlock(&dev->queue_lock); if (!netif_queue_stopped(dev)) { +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE) + if (netdev_nit && !(skb->imq_flags & IMQ_F_ENQUEUE)) +#else if (netdev_nit) +#endif dev_queue_xmit_nit(skb, dev); if (dev->hard_start_xmit(skb, dev) == 0) {